• Main Page
  • Related Pages
  • Modules
  • Namespaces
  • Classes
  • Files

snow.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
00003  *
00004  * This file is part of FFmpeg.
00005  *
00006  * FFmpeg is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * FFmpeg is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with FFmpeg; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00019  */
00020 
00021 #include "avcodec.h"
00022 #include "dsputil.h"
00023 #include "snow.h"
00024 
00025 #include "rangecoder.h"
00026 
00027 #include "mpegvideo.h"
00028 
00029 #undef NDEBUG
00030 #include <assert.h>
00031 
00032 static const int8_t quant3[256]={
00033  0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00034  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00035  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00036  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00037  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00038  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00039  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00040  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00041 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00042 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00043 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00044 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00045 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00046 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00047 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00048 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
00049 };
00050 static const int8_t quant3b[256]={
00051  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00052  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00053  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00054  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00055  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00056  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00057  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00058  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00059 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00060 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00061 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00062 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00063 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00064 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00065 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00066 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00067 };
00068 static const int8_t quant3bA[256]={
00069  0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00070  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00071  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00072  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00073  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00074  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00075  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00076  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00077  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00078  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00079  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00080  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00081  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00082  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00083  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00084  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00085 };
00086 static const int8_t quant5[256]={
00087  0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00088  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00089  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00090  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00091  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00092  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00093  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00094  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00095 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00096 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00097 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00098 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00099 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
00103 };
00104 static const int8_t quant7[256]={
00105  0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00106  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00107  2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
00108  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00109  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00110  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00111  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00112  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
00119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
00121 };
00122 static const int8_t quant9[256]={
00123  0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00124  3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00125  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00126  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00127  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00128  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00129  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00130  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
00138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
00139 };
00140 static const int8_t quant11[256]={
00141  0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
00142  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00143  4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00144  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00145  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00146  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00147  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00148  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
00155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
00157 };
00158 static const int8_t quant13[256]={
00159  0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
00160  4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00161  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00162  5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00163  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00164  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00165  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00166  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
00172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
00175 };
00176 
00177 #if 0 //64*cubic
00178 static const uint8_t obmc32[1024]={
00179   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00180   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
00181   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
00182   0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
00183   0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
00184   0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
00185   0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
00186   0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
00187   0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
00188   0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
00189   0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
00190   0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
00191   0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
00192   0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
00193   0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
00194   1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
00195   1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
00196   0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
00197   0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
00198   0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
00199   0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
00200   0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
00201   0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
00202   0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
00203   0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
00204   0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
00205   0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
00206   0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
00207   0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
00208   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
00209   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
00210   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00211 //error:0.000022
00212 };
00213 static const uint8_t obmc16[256]={
00214   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
00215   0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
00216   0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
00217   0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
00218   0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
00219   0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
00220   4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
00221   4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
00222   4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
00223   4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
00224   0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
00225   0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
00226   0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
00227   0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
00228   0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
00229   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
00230 //error:0.000033
00231 };
00232 #elif 1 // 64*linear
00233 static const uint8_t obmc32[1024]={
00234   0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
00235   0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
00236   0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
00237   0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
00238   4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
00239   4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
00240   4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
00241   4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
00242   4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
00243   4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
00244   4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
00245   4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
00246   8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
00247   8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
00248   8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
00249   8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
00250   8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
00251   8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
00252   8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
00253   8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
00254   4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
00255   4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
00256   4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
00257   4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
00258   4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
00259   4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
00260   4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
00261   4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
00262   0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
00263   0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
00264   0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
00265   0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
00266  //error:0.000020
00267 };
00268 static const uint8_t obmc16[256]={
00269   0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
00270   4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
00271   4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
00272   8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
00273   8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
00274  12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
00275  12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
00276  16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
00277  16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
00278  12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
00279  12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
00280   8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
00281   8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
00282   4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
00283   4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
00284   0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
00285 //error:0.000015
00286 };
00287 #else //64*cos
00288 static const uint8_t obmc32[1024]={
00289   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00290   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
00291   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
00292   0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
00293   0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
00294   0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
00295   0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
00296   0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
00297   0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
00298   0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
00299   0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
00300   0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
00301   0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
00302   0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
00303   0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
00304   1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
00305   1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
00306   0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
00307   0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
00308   0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
00309   0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
00310   0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
00311   0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
00312   0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
00313   0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
00314   0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
00315   0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
00316   0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
00317   0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
00318   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
00319   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
00320   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00321 //error:0.000022
00322 };
00323 static const uint8_t obmc16[256]={
00324   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
00325   0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
00326   0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
00327   0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
00328   0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
00329   4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
00330   4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
00331   0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
00332   0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
00333   4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
00334   4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
00335   0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
00336   0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
00337   0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
00338   0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
00339   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
00340 //error:0.000022
00341 };
00342 #endif /* 0 */
00343 
00344 //linear *64
00345 static const uint8_t obmc8[64]={
00346   4, 12, 20, 28, 28, 20, 12,  4,
00347  12, 36, 60, 84, 84, 60, 36, 12,
00348  20, 60,100,140,140,100, 60, 20,
00349  28, 84,140,196,196,140, 84, 28,
00350  28, 84,140,196,196,140, 84, 28,
00351  20, 60,100,140,140,100, 60, 20,
00352  12, 36, 60, 84, 84, 60, 36, 12,
00353   4, 12, 20, 28, 28, 20, 12,  4,
00354 //error:0.000000
00355 };
00356 
00357 //linear *64
00358 static const uint8_t obmc4[16]={
00359  16, 48, 48, 16,
00360  48,144,144, 48,
00361  48,144,144, 48,
00362  16, 48, 48, 16,
00363 //error:0.000000
00364 };
00365 
00366 static const uint8_t *obmc_tab[4]={
00367     obmc32, obmc16, obmc8, obmc4
00368 };
00369 
00370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
00371 
00372 typedef struct BlockNode{
00373     int16_t mx;
00374     int16_t my;
00375     uint8_t ref;
00376     uint8_t color[3];
00377     uint8_t type;
00378 //#define TYPE_SPLIT    1
00379 #define BLOCK_INTRA   1
00380 #define BLOCK_OPT     2
00381 //#define TYPE_NOCOLOR  4
00382     uint8_t level; //FIXME merge into type?
00383 }BlockNode;
00384 
00385 static const BlockNode null_block= { //FIXME add border maybe
00386     .color= {128,128,128},
00387     .mx= 0,
00388     .my= 0,
00389     .ref= 0,
00390     .type= 0,
00391     .level= 0,
00392 };
00393 
00394 #define LOG2_MB_SIZE 4
00395 #define MB_SIZE (1<<LOG2_MB_SIZE)
00396 #define ENCODER_EXTRA_BITS 4
00397 #define HTAPS_MAX 8
00398 
00399 typedef struct x_and_coeff{
00400     int16_t x;
00401     uint16_t coeff;
00402 } x_and_coeff;
00403 
00404 typedef struct SubBand{
00405     int level;
00406     int stride;
00407     int width;
00408     int height;
00409     int qlog;                                   
00410     DWTELEM *buf;
00411     IDWTELEM *ibuf;
00412     int buf_x_offset;
00413     int buf_y_offset;
00414     int stride_line; 
00415     x_and_coeff * x_coeff;
00416     struct SubBand *parent;
00417     uint8_t state[/*7*2*/ 7 + 512][32];
00418 }SubBand;
00419 
00420 typedef struct Plane{
00421     int width;
00422     int height;
00423     SubBand band[MAX_DECOMPOSITIONS][4];
00424 
00425     int htaps;
00426     int8_t hcoeff[HTAPS_MAX/2];
00427     int diag_mc;
00428     int fast_mc;
00429 
00430     int last_htaps;
00431     int8_t last_hcoeff[HTAPS_MAX/2];
00432     int last_diag_mc;
00433 }Plane;
00434 
00435 typedef struct SnowContext{
00436 //    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
00437 
00438     AVCodecContext *avctx;
00439     RangeCoder c;
00440     DSPContext dsp;
00441     AVFrame new_picture;
00442     AVFrame input_picture;              
00443     AVFrame current_picture;
00444     AVFrame last_picture[MAX_REF_FRAMES];
00445     uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
00446     AVFrame mconly_picture;
00447 //     uint8_t q_context[16];
00448     uint8_t header_state[32];
00449     uint8_t block_state[128 + 32*128];
00450     int keyframe;
00451     int always_reset;
00452     int version;
00453     int spatial_decomposition_type;
00454     int last_spatial_decomposition_type;
00455     int temporal_decomposition_type;
00456     int spatial_decomposition_count;
00457     int last_spatial_decomposition_count;
00458     int temporal_decomposition_count;
00459     int max_ref_frames;
00460     int ref_frames;
00461     int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
00462     uint32_t *ref_scores[MAX_REF_FRAMES];
00463     DWTELEM *spatial_dwt_buffer;
00464     IDWTELEM *spatial_idwt_buffer;
00465     int colorspace_type;
00466     int chroma_h_shift;
00467     int chroma_v_shift;
00468     int spatial_scalability;
00469     int qlog;
00470     int last_qlog;
00471     int lambda;
00472     int lambda2;
00473     int pass1_rc;
00474     int mv_scale;
00475     int last_mv_scale;
00476     int qbias;
00477     int last_qbias;
00478 #define QBIAS_SHIFT 3
00479     int b_width;
00480     int b_height;
00481     int block_max_depth;
00482     int last_block_max_depth;
00483     Plane plane[MAX_PLANES];
00484     BlockNode *block;
00485 #define ME_CACHE_SIZE 1024
00486     int me_cache[ME_CACHE_SIZE];
00487     int me_cache_generation;
00488     slice_buffer sb;
00489 
00490     MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
00491 }SnowContext;
00492 
00493 typedef struct {
00494     IDWTELEM *b0;
00495     IDWTELEM *b1;
00496     IDWTELEM *b2;
00497     IDWTELEM *b3;
00498     int y;
00499 } dwt_compose_t;
00500 
00501 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
00502 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
00503 
00504 static void iterative_me(SnowContext *s);
00505 
00506 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
00507 {
00508     int i;
00509 
00510     buf->base_buffer = base_buffer;
00511     buf->line_count = line_count;
00512     buf->line_width = line_width;
00513     buf->data_count = max_allocated_lines;
00514     buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
00515     buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
00516 
00517     for (i = 0; i < max_allocated_lines; i++)
00518     {
00519         buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
00520     }
00521 
00522     buf->data_stack_top = max_allocated_lines - 1;
00523 }
00524 
00525 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
00526 {
00527     int offset;
00528     IDWTELEM * buffer;
00529 
00530 //  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
00531 
00532     assert(buf->data_stack_top >= 0);
00533 //  assert(!buf->line[line]);
00534     if (buf->line[line])
00535         return buf->line[line];
00536 
00537     offset = buf->line_width * line;
00538     buffer = buf->data_stack[buf->data_stack_top];
00539     buf->data_stack_top--;
00540     buf->line[line] = buffer;
00541 
00542 //  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
00543 
00544     return buffer;
00545 }
00546 
00547 static void slice_buffer_release(slice_buffer * buf, int line)
00548 {
00549     int offset;
00550     IDWTELEM * buffer;
00551 
00552     assert(line >= 0 && line < buf->line_count);
00553     assert(buf->line[line]);
00554 
00555     offset = buf->line_width * line;
00556     buffer = buf->line[line];
00557     buf->data_stack_top++;
00558     buf->data_stack[buf->data_stack_top] = buffer;
00559     buf->line[line] = NULL;
00560 
00561 //  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
00562 }
00563 
00564 static void slice_buffer_flush(slice_buffer * buf)
00565 {
00566     int i;
00567     for (i = 0; i < buf->line_count; i++)
00568     {
00569         if (buf->line[i])
00570         {
00571 //      av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
00572             slice_buffer_release(buf, i);
00573         }
00574     }
00575 }
00576 
00577 static void slice_buffer_destroy(slice_buffer * buf)
00578 {
00579     int i;
00580     slice_buffer_flush(buf);
00581 
00582     for (i = buf->data_count - 1; i >= 0; i--)
00583     {
00584         av_freep(&buf->data_stack[i]);
00585     }
00586     av_freep(&buf->data_stack);
00587     av_freep(&buf->line);
00588 }
00589 
00590 #ifdef __sgi
00591 // Avoid a name clash on SGI IRIX
00592 #undef qexp
00593 #endif
00594 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
00595 static uint8_t qexp[QROOT];
00596 
00597 static inline int mirror(int v, int m){
00598     while((unsigned)v > (unsigned)m){
00599         v=-v;
00600         if(v<0) v+= 2*m;
00601     }
00602     return v;
00603 }
00604 
00605 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
00606     int i;
00607 
00608     if(v){
00609         const int a= FFABS(v);
00610         const int e= av_log2(a);
00611 #if 1
00612         const int el= FFMIN(e, 10);
00613         put_rac(c, state+0, 0);
00614 
00615         for(i=0; i<el; i++){
00616             put_rac(c, state+1+i, 1);  //1..10
00617         }
00618         for(; i<e; i++){
00619             put_rac(c, state+1+9, 1);  //1..10
00620         }
00621         put_rac(c, state+1+FFMIN(i,9), 0);
00622 
00623         for(i=e-1; i>=el; i--){
00624             put_rac(c, state+22+9, (a>>i)&1); //22..31
00625         }
00626         for(; i>=0; i--){
00627             put_rac(c, state+22+i, (a>>i)&1); //22..31
00628         }
00629 
00630         if(is_signed)
00631             put_rac(c, state+11 + el, v < 0); //11..21
00632 #else
00633 
00634         put_rac(c, state+0, 0);
00635         if(e<=9){
00636             for(i=0; i<e; i++){
00637                 put_rac(c, state+1+i, 1);  //1..10
00638             }
00639             put_rac(c, state+1+i, 0);
00640 
00641             for(i=e-1; i>=0; i--){
00642                 put_rac(c, state+22+i, (a>>i)&1); //22..31
00643             }
00644 
00645             if(is_signed)
00646                 put_rac(c, state+11 + e, v < 0); //11..21
00647         }else{
00648             for(i=0; i<e; i++){
00649                 put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
00650             }
00651             put_rac(c, state+1+FFMIN(i,9), 0);
00652 
00653             for(i=e-1; i>=0; i--){
00654                 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
00655             }
00656 
00657             if(is_signed)
00658                 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
00659         }
00660 #endif /* 1 */
00661     }else{
00662         put_rac(c, state+0, 1);
00663     }
00664 }
00665 
00666 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
00667     if(get_rac(c, state+0))
00668         return 0;
00669     else{
00670         int i, e, a;
00671         e= 0;
00672         while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
00673             e++;
00674         }
00675 
00676         a= 1;
00677         for(i=e-1; i>=0; i--){
00678             a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
00679         }
00680 
00681         if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
00682             return -a;
00683         else
00684             return a;
00685     }
00686 }
00687 
00688 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
00689     int i;
00690     int r= log2>=0 ? 1<<log2 : 1;
00691 
00692     assert(v>=0);
00693     assert(log2>=-4);
00694 
00695     while(v >= r){
00696         put_rac(c, state+4+log2, 1);
00697         v -= r;
00698         log2++;
00699         if(log2>0) r+=r;
00700     }
00701     put_rac(c, state+4+log2, 0);
00702 
00703     for(i=log2-1; i>=0; i--){
00704         put_rac(c, state+31-i, (v>>i)&1);
00705     }
00706 }
00707 
00708 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
00709     int i;
00710     int r= log2>=0 ? 1<<log2 : 1;
00711     int v=0;
00712 
00713     assert(log2>=-4);
00714 
00715     while(get_rac(c, state+4+log2)){
00716         v+= r;
00717         log2++;
00718         if(log2>0) r+=r;
00719     }
00720 
00721     for(i=log2-1; i>=0; i--){
00722         v+= get_rac(c, state+31-i)<<i;
00723     }
00724 
00725     return v;
00726 }
00727 
00728 static av_always_inline void
00729 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
00730      int dst_step, int src_step, int ref_step,
00731      int width, int mul, int add, int shift,
00732      int highpass, int inverse){
00733     const int mirror_left= !highpass;
00734     const int mirror_right= (width&1) ^ highpass;
00735     const int w= (width>>1) - 1 + (highpass & width);
00736     int i;
00737 
00738 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
00739     if(mirror_left){
00740         dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
00741         dst += dst_step;
00742         src += src_step;
00743     }
00744 
00745     for(i=0; i<w; i++){
00746         dst[i*dst_step] =
00747             LIFT(src[i*src_step],
00748                  ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
00749                  inverse);
00750     }
00751 
00752     if(mirror_right){
00753         dst[w*dst_step] =
00754             LIFT(src[w*src_step],
00755                  ((mul*2*ref[w*ref_step]+add)>>shift),
00756                  inverse);
00757     }
00758 }
00759 
00760 static av_always_inline void
00761 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
00762          int dst_step, int src_step, int ref_step,
00763          int width, int mul, int add, int shift,
00764          int highpass, int inverse){
00765     const int mirror_left= !highpass;
00766     const int mirror_right= (width&1) ^ highpass;
00767     const int w= (width>>1) - 1 + (highpass & width);
00768     int i;
00769 
00770 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
00771     if(mirror_left){
00772         dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
00773         dst += dst_step;
00774         src += src_step;
00775     }
00776 
00777     for(i=0; i<w; i++){
00778         dst[i*dst_step] =
00779             LIFT(src[i*src_step],
00780                  ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
00781                  inverse);
00782     }
00783 
00784     if(mirror_right){
00785         dst[w*dst_step] =
00786             LIFT(src[w*src_step],
00787                  ((mul*2*ref[w*ref_step]+add)>>shift),
00788                  inverse);
00789     }
00790 }
00791 
00792 #ifndef liftS
00793 static av_always_inline void
00794 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
00795       int dst_step, int src_step, int ref_step,
00796       int width, int mul, int add, int shift,
00797       int highpass, int inverse){
00798     const int mirror_left= !highpass;
00799     const int mirror_right= (width&1) ^ highpass;
00800     const int w= (width>>1) - 1 + (highpass & width);
00801     int i;
00802 
00803     assert(shift == 4);
00804 #define LIFTS(src, ref, inv) \
00805         ((inv) ? \
00806             (src) + (((ref) + 4*(src))>>shift): \
00807             -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
00808     if(mirror_left){
00809         dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
00810         dst += dst_step;
00811         src += src_step;
00812     }
00813 
00814     for(i=0; i<w; i++){
00815         dst[i*dst_step] =
00816             LIFTS(src[i*src_step],
00817                   mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
00818                   inverse);
00819     }
00820 
00821     if(mirror_right){
00822         dst[w*dst_step] =
00823             LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
00824     }
00825 }
00826 static av_always_inline void
00827 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
00828           int dst_step, int src_step, int ref_step,
00829           int width, int mul, int add, int shift,
00830           int highpass, int inverse){
00831     const int mirror_left= !highpass;
00832     const int mirror_right= (width&1) ^ highpass;
00833     const int w= (width>>1) - 1 + (highpass & width);
00834     int i;
00835 
00836     assert(shift == 4);
00837 #define LIFTS(src, ref, inv) \
00838     ((inv) ? \
00839         (src) + (((ref) + 4*(src))>>shift): \
00840         -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
00841     if(mirror_left){
00842         dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
00843         dst += dst_step;
00844         src += src_step;
00845     }
00846 
00847     for(i=0; i<w; i++){
00848         dst[i*dst_step] =
00849             LIFTS(src[i*src_step],
00850                   mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
00851                   inverse);
00852     }
00853 
00854     if(mirror_right){
00855         dst[w*dst_step] =
00856             LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
00857     }
00858 }
00859 #endif
00860 
00861 static void horizontal_decompose53i(DWTELEM *b, int width){
00862     DWTELEM temp[width];
00863     const int width2= width>>1;
00864     int x;
00865     const int w2= (width+1)>>1;
00866 
00867     for(x=0; x<width2; x++){
00868         temp[x   ]= b[2*x    ];
00869         temp[x+w2]= b[2*x + 1];
00870     }
00871     if(width&1)
00872         temp[x   ]= b[2*x    ];
00873 #if 0
00874     {
00875     int A1,A2,A3,A4;
00876     A2= temp[1       ];
00877     A4= temp[0       ];
00878     A1= temp[0+width2];
00879     A1 -= (A2 + A4)>>1;
00880     A4 += (A1 + 1)>>1;
00881     b[0+width2] = A1;
00882     b[0       ] = A4;
00883     for(x=1; x+1<width2; x+=2){
00884         A3= temp[x+width2];
00885         A4= temp[x+1     ];
00886         A3 -= (A2 + A4)>>1;
00887         A2 += (A1 + A3 + 2)>>2;
00888         b[x+width2] = A3;
00889         b[x       ] = A2;
00890 
00891         A1= temp[x+1+width2];
00892         A2= temp[x+2       ];
00893         A1 -= (A2 + A4)>>1;
00894         A4 += (A1 + A3 + 2)>>2;
00895         b[x+1+width2] = A1;
00896         b[x+1       ] = A4;
00897     }
00898     A3= temp[width-1];
00899     A3 -= A2;
00900     A2 += (A1 + A3 + 2)>>2;
00901     b[width -1] = A3;
00902     b[width2-1] = A2;
00903     }
00904 #else
00905     lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
00906     lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
00907 #endif
00908 }
00909 
00910 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00911     int i;
00912 
00913     for(i=0; i<width; i++){
00914         b1[i] -= (b0[i] + b2[i])>>1;
00915     }
00916 }
00917 
00918 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00919     int i;
00920 
00921     for(i=0; i<width; i++){
00922         b1[i] += (b0[i] + b2[i] + 2)>>2;
00923     }
00924 }
00925 
00926 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
00927     int y;
00928     DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
00929     DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
00930 
00931     for(y=-2; y<height; y+=2){
00932         DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
00933         DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
00934 
00935 {START_TIMER
00936         if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
00937         if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
00938 STOP_TIMER("horizontal_decompose53i")}
00939 
00940 {START_TIMER
00941         if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
00942         if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
00943 STOP_TIMER("vertical_decompose53i*")}
00944 
00945         b0=b2;
00946         b1=b3;
00947     }
00948 }
00949 
00950 static void horizontal_decompose97i(DWTELEM *b, int width){
00951     DWTELEM temp[width];
00952     const int w2= (width+1)>>1;
00953 
00954     lift (temp+w2, b    +1, b      , 1, 2, 2, width,  W_AM, W_AO, W_AS, 1, 1);
00955     liftS(temp   , b      , temp+w2, 1, 2, 1, width,  W_BM, W_BO, W_BS, 0, 0);
00956     lift (b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
00957     lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
00958 }
00959 
00960 
00961 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00962     int i;
00963 
00964     for(i=0; i<width; i++){
00965         b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
00966     }
00967 }
00968 
00969 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00970     int i;
00971 
00972     for(i=0; i<width; i++){
00973         b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
00974     }
00975 }
00976 
00977 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00978     int i;
00979 
00980     for(i=0; i<width; i++){
00981 #ifdef liftS
00982         b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
00983 #else
00984         b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
00985 #endif
00986     }
00987 }
00988 
00989 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00990     int i;
00991 
00992     for(i=0; i<width; i++){
00993         b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
00994     }
00995 }
00996 
00997 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
00998     int y;
00999     DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
01000     DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
01001     DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
01002     DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
01003 
01004     for(y=-4; y<height; y+=2){
01005         DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
01006         DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
01007 
01008 {START_TIMER
01009         if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
01010         if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
01011 if(width>400){
01012 STOP_TIMER("horizontal_decompose97i")
01013 }}
01014 
01015 {START_TIMER
01016         if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
01017         if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
01018         if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
01019         if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
01020 
01021 if(width>400){
01022 STOP_TIMER("vertical_decompose97i")
01023 }}
01024 
01025         b0=b2;
01026         b1=b3;
01027         b2=b4;
01028         b3=b5;
01029     }
01030 }
01031 
01032 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
01033     int level;
01034 
01035     for(level=0; level<decomposition_count; level++){
01036         switch(type){
01037         case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
01038         case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
01039         }
01040     }
01041 }
01042 
01043 static void horizontal_compose53i(IDWTELEM *b, int width){
01044     IDWTELEM temp[width];
01045     const int width2= width>>1;
01046     const int w2= (width+1)>>1;
01047     int x;
01048 
01049 #if 0
01050     int A1,A2,A3,A4;
01051     A2= temp[1       ];
01052     A4= temp[0       ];
01053     A1= temp[0+width2];
01054     A1 -= (A2 + A4)>>1;
01055     A4 += (A1 + 1)>>1;
01056     b[0+width2] = A1;
01057     b[0       ] = A4;
01058     for(x=1; x+1<width2; x+=2){
01059         A3= temp[x+width2];
01060         A4= temp[x+1     ];
01061         A3 -= (A2 + A4)>>1;
01062         A2 += (A1 + A3 + 2)>>2;
01063         b[x+width2] = A3;
01064         b[x       ] = A2;
01065 
01066         A1= temp[x+1+width2];
01067         A2= temp[x+2       ];
01068         A1 -= (A2 + A4)>>1;
01069         A4 += (A1 + A3 + 2)>>2;
01070         b[x+1+width2] = A1;
01071         b[x+1       ] = A4;
01072     }
01073     A3= temp[width-1];
01074     A3 -= A2;
01075     A2 += (A1 + A3 + 2)>>2;
01076     b[width -1] = A3;
01077     b[width2-1] = A2;
01078 #else
01079     inv_lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
01080     inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
01081 #endif /* 0 */
01082     for(x=0; x<width2; x++){
01083         b[2*x    ]= temp[x   ];
01084         b[2*x + 1]= temp[x+w2];
01085     }
01086     if(width&1)
01087         b[2*x    ]= temp[x   ];
01088 }
01089 
01090 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01091     int i;
01092 
01093     for(i=0; i<width; i++){
01094         b1[i] += (b0[i] + b2[i])>>1;
01095     }
01096 }
01097 
01098 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01099     int i;
01100 
01101     for(i=0; i<width; i++){
01102         b1[i] -= (b0[i] + b2[i] + 2)>>2;
01103     }
01104 }
01105 
01106 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
01107     cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
01108     cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
01109     cs->y = -1;
01110 }
01111 
01112 static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
01113     cs->b0 = buffer + mirror(-1-1, height-1)*stride;
01114     cs->b1 = buffer + mirror(-1  , height-1)*stride;
01115     cs->y = -1;
01116 }
01117 
01118 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
01119     int y= cs->y;
01120 
01121     IDWTELEM *b0= cs->b0;
01122     IDWTELEM *b1= cs->b1;
01123     IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
01124     IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
01125 
01126 {START_TIMER
01127         if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
01128         if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
01129 STOP_TIMER("vertical_compose53i*")}
01130 
01131 {START_TIMER
01132         if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
01133         if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
01134 STOP_TIMER("horizontal_compose53i")}
01135 
01136     cs->b0 = b2;
01137     cs->b1 = b3;
01138     cs->y += 2;
01139 }
01140 
01141 static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
01142     int y= cs->y;
01143     IDWTELEM *b0= cs->b0;
01144     IDWTELEM *b1= cs->b1;
01145     IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
01146     IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
01147 
01148 {START_TIMER
01149         if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
01150         if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
01151 STOP_TIMER("vertical_compose53i*")}
01152 
01153 {START_TIMER
01154         if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
01155         if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
01156 STOP_TIMER("horizontal_compose53i")}
01157 
01158     cs->b0 = b2;
01159     cs->b1 = b3;
01160     cs->y += 2;
01161 }
01162 
01163 
01164 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
01165     IDWTELEM temp[width];
01166     const int w2= (width+1)>>1;
01167 
01168     inv_lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
01169     inv_lift (temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
01170     inv_liftS(b      , temp   , temp+w2, 2, 1, 1, width,  W_BM, W_BO, W_BS, 0, 1);
01171     inv_lift (b+1    , temp+w2, b      , 2, 1, 2, width,  W_AM, W_AO, W_AS, 1, 0);
01172 }
01173 
01174 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01175     int i;
01176 
01177     for(i=0; i<width; i++){
01178         b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
01179     }
01180 }
01181 
01182 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01183     int i;
01184 
01185     for(i=0; i<width; i++){
01186         b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
01187     }
01188 }
01189 
01190 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01191     int i;
01192 
01193     for(i=0; i<width; i++){
01194 #ifdef liftS
01195         b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
01196 #else
01197         b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
01198 #endif
01199     }
01200 }
01201 
01202 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01203     int i;
01204 
01205     for(i=0; i<width; i++){
01206         b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
01207     }
01208 }
01209 
01210 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
01211     int i;
01212 
01213     for(i=0; i<width; i++){
01214         b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
01215         b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
01216 #ifdef liftS
01217         b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
01218 #else
01219         b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
01220 #endif
01221         b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
01222     }
01223 }
01224 
01225 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
01226     cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
01227     cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
01228     cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
01229     cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
01230     cs->y = -3;
01231 }
01232 
01233 static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
01234     cs->b0 = buffer + mirror(-3-1, height-1)*stride;
01235     cs->b1 = buffer + mirror(-3  , height-1)*stride;
01236     cs->b2 = buffer + mirror(-3+1, height-1)*stride;
01237     cs->b3 = buffer + mirror(-3+2, height-1)*stride;
01238     cs->y = -3;
01239 }
01240 
01241 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
01242     int y = cs->y;
01243 
01244     IDWTELEM *b0= cs->b0;
01245     IDWTELEM *b1= cs->b1;
01246     IDWTELEM *b2= cs->b2;
01247     IDWTELEM *b3= cs->b3;
01248     IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
01249     IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
01250 
01251 {START_TIMER
01252     if(y>0 && y+4<height){
01253         dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
01254     }else{
01255         if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
01256         if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
01257         if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
01258         if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
01259     }
01260 if(width>400){
01261 STOP_TIMER("vertical_compose97i")}}
01262 
01263 {START_TIMER
01264         if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
01265         if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
01266 if(width>400 && y+0<(unsigned)height){
01267 STOP_TIMER("horizontal_compose97i")}}
01268 
01269     cs->b0=b2;
01270     cs->b1=b3;
01271     cs->b2=b4;
01272     cs->b3=b5;
01273     cs->y += 2;
01274 }
01275 
01276 static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
01277     int y = cs->y;
01278     IDWTELEM *b0= cs->b0;
01279     IDWTELEM *b1= cs->b1;
01280     IDWTELEM *b2= cs->b2;
01281     IDWTELEM *b3= cs->b3;
01282     IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
01283     IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
01284 
01285 {START_TIMER
01286         if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
01287         if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
01288         if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
01289         if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
01290 if(width>400){
01291 STOP_TIMER("vertical_compose97i")}}
01292 
01293 {START_TIMER
01294         if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
01295         if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
01296 if(width>400 && b0 <= b2){
01297 STOP_TIMER("horizontal_compose97i")}}
01298 
01299     cs->b0=b2;
01300     cs->b1=b3;
01301     cs->b2=b4;
01302     cs->b3=b5;
01303     cs->y += 2;
01304 }
01305 
01306 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
01307     int level;
01308     for(level=decomposition_count-1; level>=0; level--){
01309         switch(type){
01310         case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
01311         case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
01312         }
01313     }
01314 }
01315 
01316 static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
01317     int level;
01318     for(level=decomposition_count-1; level>=0; level--){
01319         switch(type){
01320         case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
01321         case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
01322         }
01323     }
01324 }
01325 
01326 static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
01327     const int support = type==1 ? 3 : 5;
01328     int level;
01329     if(type==2) return;
01330 
01331     for(level=decomposition_count-1; level>=0; level--){
01332         while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
01333             switch(type){
01334             case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
01335                     break;
01336             case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
01337                     break;
01338             }
01339         }
01340     }
01341 }
01342 
01343 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
01344     const int support = type==1 ? 3 : 5;
01345     int level;
01346     if(type==2) return;
01347 
01348     for(level=decomposition_count-1; level>=0; level--){
01349         while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
01350             switch(type){
01351             case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
01352                     break;
01353             case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
01354                     break;
01355             }
01356         }
01357     }
01358 }
01359 
01360 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
01361         dwt_compose_t cs[MAX_DECOMPOSITIONS];
01362         int y;
01363         ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
01364         for(y=0; y<height; y+=4)
01365             ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
01366 }
01367 
01368 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
01369     const int w= b->width;
01370     const int h= b->height;
01371     int x, y;
01372 
01373     if(1){
01374         int run=0;
01375         int runs[w*h];
01376         int run_index=0;
01377         int max_index;
01378 
01379         for(y=0; y<h; y++){
01380             for(x=0; x<w; x++){
01381                 int v, p=0;
01382                 int /*ll=0, */l=0, lt=0, t=0, rt=0;
01383                 v= src[x + y*stride];
01384 
01385                 if(y){
01386                     t= src[x + (y-1)*stride];
01387                     if(x){
01388                         lt= src[x - 1 + (y-1)*stride];
01389                     }
01390                     if(x + 1 < w){
01391                         rt= src[x + 1 + (y-1)*stride];
01392                     }
01393                 }
01394                 if(x){
01395                     l= src[x - 1 + y*stride];
01396                     /*if(x > 1){
01397                         if(orientation==1) ll= src[y + (x-2)*stride];
01398                         else               ll= src[x - 2 + y*stride];
01399                     }*/
01400                 }
01401                 if(parent){
01402                     int px= x>>1;
01403                     int py= y>>1;
01404                     if(px<b->parent->width && py<b->parent->height)
01405                         p= parent[px + py*2*stride];
01406                 }
01407                 if(!(/*ll|*/l|lt|t|rt|p)){
01408                     if(v){
01409                         runs[run_index++]= run;
01410                         run=0;
01411                     }else{
01412                         run++;
01413                     }
01414                 }
01415             }
01416         }
01417         max_index= run_index;
01418         runs[run_index++]= run;
01419         run_index=0;
01420         run= runs[run_index++];
01421 
01422         put_symbol2(&s->c, b->state[30], max_index, 0);
01423         if(run_index <= max_index)
01424             put_symbol2(&s->c, b->state[1], run, 3);
01425 
01426         for(y=0; y<h; y++){
01427             if(s->c.bytestream_end - s->c.bytestream < w*40){
01428                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
01429                 return -1;
01430             }
01431             for(x=0; x<w; x++){
01432                 int v, p=0;
01433                 int /*ll=0, */l=0, lt=0, t=0, rt=0;
01434                 v= src[x + y*stride];
01435 
01436                 if(y){
01437                     t= src[x + (y-1)*stride];
01438                     if(x){
01439                         lt= src[x - 1 + (y-1)*stride];
01440                     }
01441                     if(x + 1 < w){
01442                         rt= src[x + 1 + (y-1)*stride];
01443                     }
01444                 }
01445                 if(x){
01446                     l= src[x - 1 + y*stride];
01447                     /*if(x > 1){
01448                         if(orientation==1) ll= src[y + (x-2)*stride];
01449                         else               ll= src[x - 2 + y*stride];
01450                     }*/
01451                 }
01452                 if(parent){
01453                     int px= x>>1;
01454                     int py= y>>1;
01455                     if(px<b->parent->width && py<b->parent->height)
01456                         p= parent[px + py*2*stride];
01457                 }
01458                 if(/*ll|*/l|lt|t|rt|p){
01459                     int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
01460 
01461                     put_rac(&s->c, &b->state[0][context], !!v);
01462                 }else{
01463                     if(!run){
01464                         run= runs[run_index++];
01465 
01466                         if(run_index <= max_index)
01467                             put_symbol2(&s->c, b->state[1], run, 3);
01468                         assert(v);
01469                     }else{
01470                         run--;
01471                         assert(!v);
01472                     }
01473                 }
01474                 if(v){
01475                     int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
01476                     int l2= 2*FFABS(l) + (l<0);
01477                     int t2= 2*FFABS(t) + (t<0);
01478 
01479                     put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
01480                     put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
01481                 }
01482             }
01483         }
01484     }
01485     return 0;
01486 }
01487 
01488 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
01489 //    encode_subband_qtree(s, b, src, parent, stride, orientation);
01490 //    encode_subband_z0run(s, b, src, parent, stride, orientation);
01491     return encode_subband_c0run(s, b, src, parent, stride, orientation);
01492 //    encode_subband_dzr(s, b, src, parent, stride, orientation);
01493 }
01494 
01495 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
01496     const int w= b->width;
01497     const int h= b->height;
01498     int x,y;
01499 
01500     if(1){
01501         int run, runs;
01502         x_and_coeff *xc= b->x_coeff;
01503         x_and_coeff *prev_xc= NULL;
01504         x_and_coeff *prev2_xc= xc;
01505         x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
01506         x_and_coeff *prev_parent_xc= parent_xc;
01507 
01508         runs= get_symbol2(&s->c, b->state[30], 0);
01509         if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
01510         else           run= INT_MAX;
01511 
01512         for(y=0; y<h; y++){
01513             int v=0;
01514             int lt=0, t=0, rt=0;
01515 
01516             if(y && prev_xc->x == 0){
01517                 rt= prev_xc->coeff;
01518             }
01519             for(x=0; x<w; x++){
01520                 int p=0;
01521                 const int l= v;
01522 
01523                 lt= t; t= rt;
01524 
01525                 if(y){
01526                     if(prev_xc->x <= x)
01527                         prev_xc++;
01528                     if(prev_xc->x == x + 1)
01529                         rt= prev_xc->coeff;
01530                     else
01531                         rt=0;
01532                 }
01533                 if(parent_xc){
01534                     if(x>>1 > parent_xc->x){
01535                         parent_xc++;
01536                     }
01537                     if(x>>1 == parent_xc->x){
01538                         p= parent_xc->coeff;
01539                     }
01540                 }
01541                 if(/*ll|*/l|lt|t|rt|p){
01542                     int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
01543 
01544                     v=get_rac(&s->c, &b->state[0][context]);
01545                     if(v){
01546                         v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
01547                         v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
01548 
01549                         xc->x=x;
01550                         (xc++)->coeff= v;
01551                     }
01552                 }else{
01553                     if(!run){
01554                         if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
01555                         else           run= INT_MAX;
01556                         v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
01557                         v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
01558 
01559                         xc->x=x;
01560                         (xc++)->coeff= v;
01561                     }else{
01562                         int max_run;
01563                         run--;
01564                         v=0;
01565 
01566                         if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
01567                         else  max_run= FFMIN(run, w-x-1);
01568                         if(parent_xc)
01569                             max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
01570                         x+= max_run;
01571                         run-= max_run;
01572                     }
01573                 }
01574             }
01575             (xc++)->x= w+1; //end marker
01576             prev_xc= prev2_xc;
01577             prev2_xc= xc;
01578 
01579             if(parent_xc){
01580                 if(y&1){
01581                     while(parent_xc->x != parent->width+1)
01582                         parent_xc++;
01583                     parent_xc++;
01584                     prev_parent_xc= parent_xc;
01585                 }else{
01586                     parent_xc= prev_parent_xc;
01587                 }
01588             }
01589         }
01590 
01591         (xc++)->x= w+1; //end marker
01592     }
01593 }
01594 
01595 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
01596     const int w= b->width;
01597     int y;
01598     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
01599     int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
01600     int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
01601     int new_index = 0;
01602 
01603     START_TIMER
01604 
01605     if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
01606         qadd= 0;
01607         qmul= 1<<QEXPSHIFT;
01608     }
01609 
01610     /* If we are on the second or later slice, restore our index. */
01611     if (start_y != 0)
01612         new_index = save_state[0];
01613 
01614 
01615     for(y=start_y; y<h; y++){
01616         int x = 0;
01617         int v;
01618         IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
01619         memset(line, 0, b->width*sizeof(IDWTELEM));
01620         v = b->x_coeff[new_index].coeff;
01621         x = b->x_coeff[new_index++].x;
01622         while(x < w)
01623         {
01624             register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
01625             register int u= -(v&1);
01626             line[x] = (t^u) - u;
01627 
01628             v = b->x_coeff[new_index].coeff;
01629             x = b->x_coeff[new_index++].x;
01630         }
01631     }
01632     if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
01633         STOP_TIMER("decode_subband")
01634     }
01635 
01636     /* Save our variables for the next slice. */
01637     save_state[0] = new_index;
01638 
01639     return;
01640 }
01641 
01642 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
01643     int plane_index, level, orientation;
01644 
01645     for(plane_index=0; plane_index<3; plane_index++){
01646         for(level=0; level<MAX_DECOMPOSITIONS; level++){
01647             for(orientation=level ? 1:0; orientation<4; orientation++){
01648                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
01649             }
01650         }
01651     }
01652     memset(s->header_state, MID_STATE, sizeof(s->header_state));
01653     memset(s->block_state, MID_STATE, sizeof(s->block_state));
01654 }
01655 
01656 static int alloc_blocks(SnowContext *s){
01657     int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
01658     int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
01659 
01660     s->b_width = w;
01661     s->b_height= h;
01662 
01663     s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
01664     return 0;
01665 }
01666 
01667 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
01668     uint8_t *bytestream= d->bytestream;
01669     uint8_t *bytestream_start= d->bytestream_start;
01670     *d= *s;
01671     d->bytestream= bytestream;
01672     d->bytestream_start= bytestream_start;
01673 }
01674 
01675 //near copy & paste from dsputil, FIXME
01676 static int pix_sum(uint8_t * pix, int line_size, int w)
01677 {
01678     int s, i, j;
01679 
01680     s = 0;
01681     for (i = 0; i < w; i++) {
01682         for (j = 0; j < w; j++) {
01683             s += pix[0];
01684             pix ++;
01685         }
01686         pix += line_size - w;
01687     }
01688     return s;
01689 }
01690 
01691 //near copy & paste from dsputil, FIXME
01692 static int pix_norm1(uint8_t * pix, int line_size, int w)
01693 {
01694     int s, i, j;
01695     uint32_t *sq = ff_squareTbl + 256;
01696 
01697     s = 0;
01698     for (i = 0; i < w; i++) {
01699         for (j = 0; j < w; j ++) {
01700             s += sq[pix[0]];
01701             pix ++;
01702         }
01703         pix += line_size - w;
01704     }
01705     return s;
01706 }
01707 
01708 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
01709     const int w= s->b_width << s->block_max_depth;
01710     const int rem_depth= s->block_max_depth - level;
01711     const int index= (x + y*w) << rem_depth;
01712     const int block_w= 1<<rem_depth;
01713     BlockNode block;
01714     int i,j;
01715 
01716     block.color[0]= l;
01717     block.color[1]= cb;
01718     block.color[2]= cr;
01719     block.mx= mx;
01720     block.my= my;
01721     block.ref= ref;
01722     block.type= type;
01723     block.level= level;
01724 
01725     for(j=0; j<block_w; j++){
01726         for(i=0; i<block_w; i++){
01727             s->block[index + i + j*w]= block;
01728         }
01729     }
01730 }
01731 
01732 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
01733     const int offset[3]= {
01734           y*c->  stride + x,
01735         ((y*c->uvstride + x)>>1),
01736         ((y*c->uvstride + x)>>1),
01737     };
01738     int i;
01739     for(i=0; i<3; i++){
01740         c->src[0][i]= src [i];
01741         c->ref[0][i]= ref [i] + offset[i];
01742     }
01743     assert(!ref_index);
01744 }
01745 
01746 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
01747                            const BlockNode *left, const BlockNode *top, const BlockNode *tr){
01748     if(s->ref_frames == 1){
01749         *mx = mid_pred(left->mx, top->mx, tr->mx);
01750         *my = mid_pred(left->my, top->my, tr->my);
01751     }else{
01752         const int *scale = scale_mv_ref[ref];
01753         *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
01754                        (top ->mx * scale[top ->ref] + 128) >>8,
01755                        (tr  ->mx * scale[tr  ->ref] + 128) >>8);
01756         *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
01757                        (top ->my * scale[top ->ref] + 128) >>8,
01758                        (tr  ->my * scale[tr  ->ref] + 128) >>8);
01759     }
01760 }
01761 
01762 //FIXME copy&paste
01763 #define P_LEFT P[1]
01764 #define P_TOP P[2]
01765 #define P_TOPRIGHT P[3]
01766 #define P_MEDIAN P[4]
01767 #define P_MV1 P[9]
01768 #define FLAG_QPEL   1 //must be 1
01769 
01770 static int encode_q_branch(SnowContext *s, int level, int x, int y){
01771     uint8_t p_buffer[1024];
01772     uint8_t i_buffer[1024];
01773     uint8_t p_state[sizeof(s->block_state)];
01774     uint8_t i_state[sizeof(s->block_state)];
01775     RangeCoder pc, ic;
01776     uint8_t *pbbak= s->c.bytestream;
01777     uint8_t *pbbak_start= s->c.bytestream_start;
01778     int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
01779     const int w= s->b_width  << s->block_max_depth;
01780     const int h= s->b_height << s->block_max_depth;
01781     const int rem_depth= s->block_max_depth - level;
01782     const int index= (x + y*w) << rem_depth;
01783     const int block_w= 1<<(LOG2_MB_SIZE - level);
01784     int trx= (x+1)<<rem_depth;
01785     int try= (y+1)<<rem_depth;
01786     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
01787     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
01788     const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
01789     const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
01790     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
01791     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
01792     int pl = left->color[0];
01793     int pcb= left->color[1];
01794     int pcr= left->color[2];
01795     int pmx, pmy;
01796     int mx=0, my=0;
01797     int l,cr,cb;
01798     const int stride= s->current_picture.linesize[0];
01799     const int uvstride= s->current_picture.linesize[1];
01800     uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
01801                                 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
01802                                 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
01803     int P[10][2];
01804     int16_t last_mv[3][2];
01805     int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
01806     const int shift= 1+qpel;
01807     MotionEstContext *c= &s->m.me;
01808     int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
01809     int mx_context= av_log2(2*FFABS(left->mx - top->mx));
01810     int my_context= av_log2(2*FFABS(left->my - top->my));
01811     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
01812     int ref, best_ref, ref_score, ref_mx, ref_my;
01813 
01814     assert(sizeof(s->block_state) >= 256);
01815     if(s->keyframe){
01816         set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
01817         return 0;
01818     }
01819 
01820 //    clip predictors / edge ?
01821 
01822     P_LEFT[0]= left->mx;
01823     P_LEFT[1]= left->my;
01824     P_TOP [0]= top->mx;
01825     P_TOP [1]= top->my;
01826     P_TOPRIGHT[0]= tr->mx;
01827     P_TOPRIGHT[1]= tr->my;
01828 
01829     last_mv[0][0]= s->block[index].mx;
01830     last_mv[0][1]= s->block[index].my;
01831     last_mv[1][0]= right->mx;
01832     last_mv[1][1]= right->my;
01833     last_mv[2][0]= bottom->mx;
01834     last_mv[2][1]= bottom->my;
01835 
01836     s->m.mb_stride=2;
01837     s->m.mb_x=
01838     s->m.mb_y= 0;
01839     c->skip= 0;
01840 
01841     assert(c->  stride ==   stride);
01842     assert(c->uvstride == uvstride);
01843 
01844     c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
01845     c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
01846     c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
01847     c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
01848 
01849     c->xmin = - x*block_w - 16+2;
01850     c->ymin = - y*block_w - 16+2;
01851     c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
01852     c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
01853 
01854     if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
01855     if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
01856     if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
01857     if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
01858     if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
01859     if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
01860     if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
01861 
01862     P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
01863     P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
01864 
01865     if (!y) {
01866         c->pred_x= P_LEFT[0];
01867         c->pred_y= P_LEFT[1];
01868     } else {
01869         c->pred_x = P_MEDIAN[0];
01870         c->pred_y = P_MEDIAN[1];
01871     }
01872 
01873     score= INT_MAX;
01874     best_ref= 0;
01875     for(ref=0; ref<s->ref_frames; ref++){
01876         init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
01877 
01878         ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
01879                                          (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
01880 
01881         assert(ref_mx >= c->xmin);
01882         assert(ref_mx <= c->xmax);
01883         assert(ref_my >= c->ymin);
01884         assert(ref_my <= c->ymax);
01885 
01886         ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
01887         ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
01888         ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
01889         if(s->ref_mvs[ref]){
01890             s->ref_mvs[ref][index][0]= ref_mx;
01891             s->ref_mvs[ref][index][1]= ref_my;
01892             s->ref_scores[ref][index]= ref_score;
01893         }
01894         if(score > ref_score){
01895             score= ref_score;
01896             best_ref= ref;
01897             mx= ref_mx;
01898             my= ref_my;
01899         }
01900     }
01901     //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
01902 
01903   //  subpel search
01904     base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
01905     pc= s->c;
01906     pc.bytestream_start=
01907     pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
01908     memcpy(p_state, s->block_state, sizeof(s->block_state));
01909 
01910     if(level!=s->block_max_depth)
01911         put_rac(&pc, &p_state[4 + s_context], 1);
01912     put_rac(&pc, &p_state[1 + left->type + top->type], 0);
01913     if(s->ref_frames > 1)
01914         put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
01915     pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
01916     put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
01917     put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
01918     p_len= pc.bytestream - pc.bytestream_start;
01919     score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
01920 
01921     block_s= block_w*block_w;
01922     sum = pix_sum(current_data[0], stride, block_w);
01923     l= (sum + block_s/2)/block_s;
01924     iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
01925 
01926     block_s= block_w*block_w>>2;
01927     sum = pix_sum(current_data[1], uvstride, block_w>>1);
01928     cb= (sum + block_s/2)/block_s;
01929 //    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
01930     sum = pix_sum(current_data[2], uvstride, block_w>>1);
01931     cr= (sum + block_s/2)/block_s;
01932 //    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
01933 
01934     ic= s->c;
01935     ic.bytestream_start=
01936     ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
01937     memcpy(i_state, s->block_state, sizeof(s->block_state));
01938     if(level!=s->block_max_depth)
01939         put_rac(&ic, &i_state[4 + s_context], 1);
01940     put_rac(&ic, &i_state[1 + left->type + top->type], 1);
01941     put_symbol(&ic, &i_state[32],  l-pl , 1);
01942     put_symbol(&ic, &i_state[64], cb-pcb, 1);
01943     put_symbol(&ic, &i_state[96], cr-pcr, 1);
01944     i_len= ic.bytestream - ic.bytestream_start;
01945     iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
01946 
01947 //    assert(score==256*256*256*64-1);
01948     assert(iscore < 255*255*256 + s->lambda2*10);
01949     assert(iscore >= 0);
01950     assert(l>=0 && l<=255);
01951     assert(pl>=0 && pl<=255);
01952 
01953     if(level==0){
01954         int varc= iscore >> 8;
01955         int vard= score >> 8;
01956         if (vard <= 64 || vard < varc)
01957             c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
01958         else
01959             c->scene_change_score+= s->m.qscale;
01960     }
01961 
01962     if(level!=s->block_max_depth){
01963         put_rac(&s->c, &s->block_state[4 + s_context], 0);
01964         score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
01965         score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
01966         score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
01967         score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
01968         score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
01969 
01970         if(score2 < score && score2 < iscore)
01971             return score2;
01972     }
01973 
01974     if(iscore < score){
01975         pred_mv(s, &pmx, &pmy, 0, left, top, tr);
01976         memcpy(pbbak, i_buffer, i_len);
01977         s->c= ic;
01978         s->c.bytestream_start= pbbak_start;
01979         s->c.bytestream= pbbak + i_len;
01980         set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
01981         memcpy(s->block_state, i_state, sizeof(s->block_state));
01982         return iscore;
01983     }else{
01984         memcpy(pbbak, p_buffer, p_len);
01985         s->c= pc;
01986         s->c.bytestream_start= pbbak_start;
01987         s->c.bytestream= pbbak + p_len;
01988         set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
01989         memcpy(s->block_state, p_state, sizeof(s->block_state));
01990         return score;
01991     }
01992 }
01993 
01994 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
01995     if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
01996         return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
01997     }else{
01998         return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
01999     }
02000 }
02001 
02002 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
02003     const int w= s->b_width  << s->block_max_depth;
02004     const int rem_depth= s->block_max_depth - level;
02005     const int index= (x + y*w) << rem_depth;
02006     int trx= (x+1)<<rem_depth;
02007     BlockNode *b= &s->block[index];
02008     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
02009     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
02010     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
02011     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
02012     int pl = left->color[0];
02013     int pcb= left->color[1];
02014     int pcr= left->color[2];
02015     int pmx, pmy;
02016     int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
02017     int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
02018     int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
02019     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
02020 
02021     if(s->keyframe){
02022         set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
02023         return;
02024     }
02025 
02026     if(level!=s->block_max_depth){
02027         if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
02028             put_rac(&s->c, &s->block_state[4 + s_context], 1);
02029         }else{
02030             put_rac(&s->c, &s->block_state[4 + s_context], 0);
02031             encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
02032             encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
02033             encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
02034             encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
02035             return;
02036         }
02037     }
02038     if(b->type & BLOCK_INTRA){
02039         pred_mv(s, &pmx, &pmy, 0, left, top, tr);
02040         put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
02041         put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
02042         put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
02043         put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
02044         set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
02045     }else{
02046         pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
02047         put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
02048         if(s->ref_frames > 1)
02049             put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
02050         put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
02051         put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
02052         set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
02053     }
02054 }
02055 
02056 static void decode_q_branch(SnowContext *s, int level, int x, int y){
02057     const int w= s->b_width << s->block_max_depth;
02058     const int rem_depth= s->block_max_depth - level;
02059     const int index= (x + y*w) << rem_depth;
02060     int trx= (x+1)<<rem_depth;
02061     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
02062     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
02063     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
02064     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
02065     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
02066 
02067     if(s->keyframe){
02068         set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
02069         return;
02070     }
02071 
02072     if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
02073         int type, mx, my;
02074         int l = left->color[0];
02075         int cb= left->color[1];
02076         int cr= left->color[2];
02077         int ref = 0;
02078         int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
02079         int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
02080         int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
02081 
02082         type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
02083 
02084         if(type){
02085             pred_mv(s, &mx, &my, 0, left, top, tr);
02086             l += get_symbol(&s->c, &s->block_state[32], 1);
02087             cb+= get_symbol(&s->c, &s->block_state[64], 1);
02088             cr+= get_symbol(&s->c, &s->block_state[96], 1);
02089         }else{
02090             if(s->ref_frames > 1)
02091                 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
02092             pred_mv(s, &mx, &my, ref, left, top, tr);
02093             mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
02094             my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
02095         }
02096         set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
02097     }else{
02098         decode_q_branch(s, level+1, 2*x+0, 2*y+0);
02099         decode_q_branch(s, level+1, 2*x+1, 2*y+0);
02100         decode_q_branch(s, level+1, 2*x+0, 2*y+1);
02101         decode_q_branch(s, level+1, 2*x+1, 2*y+1);
02102     }
02103 }
02104 
02105 static void encode_blocks(SnowContext *s, int search){
02106     int x, y;
02107     int w= s->b_width;
02108     int h= s->b_height;
02109 
02110     if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
02111         iterative_me(s);
02112 
02113     for(y=0; y<h; y++){
02114         if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
02115             av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
02116             return;
02117         }
02118         for(x=0; x<w; x++){
02119             if(s->avctx->me_method == ME_ITER || !search)
02120                 encode_q_branch2(s, 0, x, y);
02121             else
02122                 encode_q_branch (s, 0, x, y);
02123         }
02124     }
02125 }
02126 
02127 static void decode_blocks(SnowContext *s){
02128     int x, y;
02129     int w= s->b_width;
02130     int h= s->b_height;
02131 
02132     for(y=0; y<h; y++){
02133         for(x=0; x<w; x++){
02134             decode_q_branch(s, 0, x, y);
02135         }
02136     }
02137 }
02138 
02139 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
02140     const static uint8_t weight[64]={
02141     8,7,6,5,4,3,2,1,
02142     7,7,0,0,0,0,0,1,
02143     6,0,6,0,0,0,2,0,
02144     5,0,0,5,0,3,0,0,
02145     4,0,0,0,4,0,0,0,
02146     3,0,0,5,0,3,0,0,
02147     2,0,6,0,0,0,2,0,
02148     1,7,0,0,0,0,0,1,
02149     };
02150 
02151     const static uint8_t brane[256]={
02152     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
02153     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
02154     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
02155     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
02156     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
02157     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
02158     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
02159     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
02160     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
02161     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
02162     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
02163     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
02164     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
02165     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
02166     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
02167     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
02168     };
02169 
02170     const static uint8_t needs[16]={
02171     0,1,0,0,
02172     2,4,2,0,
02173     0,1,0,0,
02174     15
02175     };
02176 
02177     int x, y, b, r, l;
02178     int16_t tmpIt   [64*(32+HTAPS_MAX)];
02179     uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
02180     int16_t *tmpI= tmpIt;
02181     uint8_t *tmp2= tmp2t[0];
02182     uint8_t *hpel[11];
02183 START_TIMER
02184     assert(dx<16 && dy<16);
02185     r= brane[dx + 16*dy]&15;
02186     l= brane[dx + 16*dy]>>4;
02187 
02188     b= needs[l] | needs[r];
02189     if(p && !p->diag_mc)
02190         b= 15;
02191 
02192     if(b&5){
02193         for(y=0; y < b_h+HTAPS_MAX-1; y++){
02194             for(x=0; x < b_w; x++){
02195                 int a_1=src[x + HTAPS_MAX/2-4];
02196                 int a0= src[x + HTAPS_MAX/2-3];
02197                 int a1= src[x + HTAPS_MAX/2-2];
02198                 int a2= src[x + HTAPS_MAX/2-1];
02199                 int a3= src[x + HTAPS_MAX/2+0];
02200                 int a4= src[x + HTAPS_MAX/2+1];
02201                 int a5= src[x + HTAPS_MAX/2+2];
02202                 int a6= src[x + HTAPS_MAX/2+3];
02203                 int am=0;
02204                 if(!p || p->fast_mc){
02205                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
02206                     tmpI[x]= am;
02207                     am= (am+16)>>5;
02208                 }else{
02209                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
02210                     tmpI[x]= am;
02211                     am= (am+32)>>6;
02212                 }
02213 
02214                 if(am&(~255)) am= ~(am>>31);
02215                 tmp2[x]= am;
02216             }
02217             tmpI+= 64;
02218             tmp2+= stride;
02219             src += stride;
02220         }
02221         src -= stride*y;
02222     }
02223     src += HTAPS_MAX/2 - 1;
02224     tmp2= tmp2t[1];
02225 
02226     if(b&2){
02227         for(y=0; y < b_h; y++){
02228             for(x=0; x < b_w+1; x++){
02229                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
02230                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
02231                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
02232                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
02233                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
02234                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
02235                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
02236                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
02237                 int am=0;
02238                 if(!p || p->fast_mc)
02239                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
02240                 else
02241                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
02242 
02243                 if(am&(~255)) am= ~(am>>31);
02244                 tmp2[x]= am;
02245             }
02246             src += stride;
02247             tmp2+= stride;
02248         }
02249         src -= stride*y;
02250     }
02251     src += stride*(HTAPS_MAX/2 - 1);
02252     tmp2= tmp2t[2];
02253     tmpI= tmpIt;
02254     if(b&4){
02255         for(y=0; y < b_h; y++){
02256             for(x=0; x < b_w; x++){
02257                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
02258                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
02259                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
02260                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
02261                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
02262                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
02263                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
02264                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
02265                 int am=0;
02266                 if(!p || p->fast_mc)
02267                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
02268                 else
02269                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
02270                 if(am&(~255)) am= ~(am>>31);
02271                 tmp2[x]= am;
02272             }
02273             tmpI+= 64;
02274             tmp2+= stride;
02275         }
02276     }
02277 
02278     hpel[ 0]= src;
02279     hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
02280     hpel[ 2]= src + 1;
02281 
02282     hpel[ 4]= tmp2t[1];
02283     hpel[ 5]= tmp2t[2];
02284     hpel[ 6]= tmp2t[1] + 1;
02285 
02286     hpel[ 8]= src + stride;
02287     hpel[ 9]= hpel[1] + stride;
02288     hpel[10]= hpel[8] + 1;
02289 
02290     if(b==15){
02291         uint8_t *src1= hpel[dx/8 + dy/8*4  ];
02292         uint8_t *src2= hpel[dx/8 + dy/8*4+1];
02293         uint8_t *src3= hpel[dx/8 + dy/8*4+4];
02294         uint8_t *src4= hpel[dx/8 + dy/8*4+5];
02295         dx&=7;
02296         dy&=7;
02297         for(y=0; y < b_h; y++){
02298             for(x=0; x < b_w; x++){
02299                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
02300                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
02301             }
02302             src1+=stride;
02303             src2+=stride;
02304             src3+=stride;
02305             src4+=stride;
02306             dst +=stride;
02307         }
02308     }else{
02309         uint8_t *src1= hpel[l];
02310         uint8_t *src2= hpel[r];
02311         int a= weight[((dx&7) + (8*(dy&7)))];
02312         int b= 8-a;
02313         for(y=0; y < b_h; y++){
02314             for(x=0; x < b_w; x++){
02315                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
02316             }
02317             src1+=stride;
02318             src2+=stride;
02319             dst +=stride;
02320         }
02321     }
02322 STOP_TIMER("mc_block")
02323 }
02324 
02325 #define mca(dx,dy,b_w)\
02326 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
02327     uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
02328     assert(h==b_w);\
02329     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
02330 }
02331 
02332 mca( 0, 0,16)
02333 mca( 8, 0,16)
02334 mca( 0, 8,16)
02335 mca( 8, 8,16)
02336 mca( 0, 0,8)
02337 mca( 8, 0,8)
02338 mca( 0, 8,8)
02339 mca( 8, 8,8)
02340 
02341 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
02342     if(block->type & BLOCK_INTRA){
02343         int x, y;
02344         const int color = block->color[plane_index];
02345         const int color4= color*0x01010101;
02346         if(b_w==32){
02347             for(y=0; y < b_h; y++){
02348                 *(uint32_t*)&dst[0 + y*stride]= color4;
02349                 *(uint32_t*)&dst[4 + y*stride]= color4;
02350                 *(uint32_t*)&dst[8 + y*stride]= color4;
02351                 *(uint32_t*)&dst[12+ y*stride]= color4;
02352                 *(uint32_t*)&dst[16+ y*stride]= color4;
02353                 *(uint32_t*)&dst[20+ y*stride]= color4;
02354                 *(uint32_t*)&dst[24+ y*stride]= color4;
02355                 *(uint32_t*)&dst[28+ y*stride]= color4;
02356             }
02357         }else if(b_w==16){
02358             for(y=0; y < b_h; y++){
02359                 *(uint32_t*)&dst[0 + y*stride]= color4;
02360                 *(uint32_t*)&dst[4 + y*stride]= color4;
02361                 *(uint32_t*)&dst[8 + y*stride]= color4;
02362                 *(uint32_t*)&dst[12+ y*stride]= color4;
02363             }
02364         }else if(b_w==8){
02365             for(y=0; y < b_h; y++){
02366                 *(uint32_t*)&dst[0 + y*stride]= color4;
02367                 *(uint32_t*)&dst[4 + y*stride]= color4;
02368             }
02369         }else if(b_w==4){
02370             for(y=0; y < b_h; y++){
02371                 *(uint32_t*)&dst[0 + y*stride]= color4;
02372             }
02373         }else{
02374             for(y=0; y < b_h; y++){
02375                 for(x=0; x < b_w; x++){
02376                     dst[x + y*stride]= color;
02377                 }
02378             }
02379         }
02380     }else{
02381         uint8_t *src= s->last_picture[block->ref].data[plane_index];
02382         const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
02383         int mx= block->mx*scale;
02384         int my= block->my*scale;
02385         const int dx= mx&15;
02386         const int dy= my&15;
02387         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
02388         sx += (mx>>4) - (HTAPS_MAX/2-1);
02389         sy += (my>>4) - (HTAPS_MAX/2-1);
02390         src += sx + sy*stride;
02391         if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
02392            || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
02393             ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
02394             src= tmp + MB_SIZE;
02395         }
02396 //        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
02397 //        assert(!(b_w&(b_w-1)));
02398         assert(b_w>1 && b_h>1);
02399         assert(tab_index>=0 && tab_index<4 || b_w==32);
02400         if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
02401             mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
02402         else if(b_w==32){
02403             int y;
02404             for(y=0; y<b_h; y+=16){
02405                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
02406                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
02407             }
02408         }else if(b_w==b_h)
02409             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
02410         else if(b_w==2*b_h){
02411             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
02412             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
02413         }else{
02414             assert(2*b_w==b_h);
02415             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
02416             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
02417         }
02418     }
02419 }
02420 
02421 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
02422                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
02423     int y, x;
02424     IDWTELEM * dst;
02425     for(y=0; y<b_h; y++){
02426         //FIXME ugly misuse of obmc_stride
02427         const uint8_t *obmc1= obmc + y*obmc_stride;
02428         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
02429         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
02430         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
02431         dst = slice_buffer_get_line(sb, src_y + y);
02432         for(x=0; x<b_w; x++){
02433             int v=   obmc1[x] * block[3][x + y*src_stride]
02434                     +obmc2[x] * block[2][x + y*src_stride]
02435                     +obmc3[x] * block[1][x + y*src_stride]
02436                     +obmc4[x] * block[0][x + y*src_stride];
02437 
02438             v <<= 8 - LOG2_OBMC_MAX;
02439             if(FRAC_BITS != 8){
02440                 v >>= 8 - FRAC_BITS;
02441             }
02442             if(add){
02443                 v += dst[x + src_x];
02444                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
02445                 if(v&(~255)) v= ~(v>>31);
02446                 dst8[x + y*src_stride] = v;
02447             }else{
02448                 dst[x + src_x] -= v;
02449             }
02450         }
02451     }
02452 }
02453 
02454 //FIXME name clenup (b_w, block_w, b_width stuff)
02455 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
02456     const int b_width = s->b_width  << s->block_max_depth;
02457     const int b_height= s->b_height << s->block_max_depth;
02458     const int b_stride= b_width;
02459     BlockNode *lt= &s->block[b_x + b_y*b_stride];
02460     BlockNode *rt= lt+1;
02461     BlockNode *lb= lt+b_stride;
02462     BlockNode *rb= lb+1;
02463     uint8_t *block[4];
02464     int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
02465     uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
02466     uint8_t *ptmp;
02467     int x,y;
02468 
02469     if(b_x<0){
02470         lt= rt;
02471         lb= rb;
02472     }else if(b_x + 1 >= b_width){
02473         rt= lt;
02474         rb= lb;
02475     }
02476     if(b_y<0){
02477         lt= lb;
02478         rt= rb;
02479     }else if(b_y + 1 >= b_height){
02480         lb= lt;
02481         rb= rt;
02482     }
02483 
02484     if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
02485         obmc -= src_x;
02486         b_w += src_x;
02487         if(!sliced && !offset_dst)
02488             dst -= src_x;
02489         src_x=0;
02490     }else if(src_x + b_w > w){
02491         b_w = w - src_x;
02492     }
02493     if(src_y<0){
02494         obmc -= src_y*obmc_stride;
02495         b_h += src_y;
02496         if(!sliced && !offset_dst)
02497             dst -= src_y*dst_stride;
02498         src_y=0;
02499     }else if(src_y + b_h> h){
02500         b_h = h - src_y;
02501     }
02502 
02503     if(b_w<=0 || b_h<=0) return;
02504 
02505 assert(src_stride > 2*MB_SIZE + 5);
02506     if(!sliced && offset_dst)
02507         dst += src_x + src_y*dst_stride;
02508     dst8+= src_x + src_y*src_stride;
02509 //    src += src_x + src_y*src_stride;
02510 
02511     ptmp= tmp + 3*tmp_step;
02512     block[0]= ptmp;
02513     ptmp+=tmp_step;
02514     pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
02515 
02516     if(same_block(lt, rt)){
02517         block[1]= block[0];
02518     }else{
02519         block[1]= ptmp;
02520         ptmp+=tmp_step;
02521         pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
02522     }
02523 
02524     if(same_block(lt, lb)){
02525         block[2]= block[0];
02526     }else if(same_block(rt, lb)){
02527         block[2]= block[1];
02528     }else{
02529         block[2]= ptmp;
02530         ptmp+=tmp_step;
02531         pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
02532     }
02533 
02534     if(same_block(lt, rb) ){
02535         block[3]= block[0];
02536     }else if(same_block(rt, rb)){
02537         block[3]= block[1];
02538     }else if(same_block(lb, rb)){
02539         block[3]= block[2];
02540     }else{
02541         block[3]= ptmp;
02542         pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
02543     }
02544 #if 0
02545     for(y=0; y<b_h; y++){
02546         for(x=0; x<b_w; x++){
02547             int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
02548             if(add) dst[x + y*dst_stride] += v;
02549             else    dst[x + y*dst_stride] -= v;
02550         }
02551     }
02552     for(y=0; y<b_h; y++){
02553         uint8_t *obmc2= obmc + (obmc_stride>>1);
02554         for(x=0; x<b_w; x++){
02555             int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
02556             if(add) dst[x + y*dst_stride] += v;
02557             else    dst[x + y*dst_stride] -= v;
02558         }
02559     }
02560     for(y=0; y<b_h; y++){
02561         uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
02562         for(x=0; x<b_w; x++){
02563             int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
02564             if(add) dst[x + y*dst_stride] += v;
02565             else    dst[x + y*dst_stride] -= v;
02566         }
02567     }
02568     for(y=0; y<b_h; y++){
02569         uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
02570         uint8_t *obmc4= obmc3+ (obmc_stride>>1);
02571         for(x=0; x<b_w; x++){
02572             int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
02573             if(add) dst[x + y*dst_stride] += v;
02574             else    dst[x + y*dst_stride] -= v;
02575         }
02576     }
02577 #else
02578     if(sliced){
02579         START_TIMER
02580 
02581         s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
02582         STOP_TIMER("inner_add_yblock")
02583     }else
02584     for(y=0; y<b_h; y++){
02585         //FIXME ugly misuse of obmc_stride
02586         const uint8_t *obmc1= obmc + y*obmc_stride;
02587         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
02588         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
02589         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
02590         for(x=0; x<b_w; x++){
02591             int v=   obmc1[x] * block[3][x + y*src_stride]
02592                     +obmc2[x] * block[2][x + y*src_stride]
02593                     +obmc3[x] * block[1][x + y*src_stride]
02594                     +obmc4[x] * block[0][x + y*src_stride];
02595 
02596             v <<= 8 - LOG2_OBMC_MAX;
02597             if(FRAC_BITS != 8){
02598                 v >>= 8 - FRAC_BITS;
02599             }
02600             if(add){
02601                 v += dst[x + y*dst_stride];
02602                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
02603                 if(v&(~255)) v= ~(v>>31);
02604                 dst8[x + y*src_stride] = v;
02605             }else{
02606                 dst[x + y*dst_stride] -= v;
02607             }
02608         }
02609     }
02610 #endif /* 0 */
02611 }
02612 
02613 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
02614     Plane *p= &s->plane[plane_index];
02615     const int mb_w= s->b_width  << s->block_max_depth;
02616     const int mb_h= s->b_height << s->block_max_depth;
02617     int x, y, mb_x;
02618     int block_size = MB_SIZE >> s->block_max_depth;
02619     int block_w    = plane_index ? block_size/2 : block_size;
02620     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02621     int obmc_stride= plane_index ? block_size : 2*block_size;
02622     int ref_stride= s->current_picture.linesize[plane_index];
02623     uint8_t *dst8= s->current_picture.data[plane_index];
02624     int w= p->width;
02625     int h= p->height;
02626     START_TIMER
02627 
02628     if(s->keyframe || (s->avctx->debug&512)){
02629         if(mb_y==mb_h)
02630             return;
02631 
02632         if(add){
02633             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
02634             {
02635 //                DWTELEM * line = slice_buffer_get_line(sb, y);
02636                 IDWTELEM * line = sb->line[y];
02637                 for(x=0; x<w; x++)
02638                 {
02639 //                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
02640                     int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
02641                     v >>= FRAC_BITS;
02642                     if(v&(~255)) v= ~(v>>31);
02643                     dst8[x + y*ref_stride]= v;
02644                 }
02645             }
02646         }else{
02647             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
02648             {
02649 //                DWTELEM * line = slice_buffer_get_line(sb, y);
02650                 IDWTELEM * line = sb->line[y];
02651                 for(x=0; x<w; x++)
02652                 {
02653                     line[x] -= 128 << FRAC_BITS;
02654 //                    buf[x + y*w]-= 128<<FRAC_BITS;
02655                 }
02656             }
02657         }
02658 
02659         return;
02660     }
02661 
02662         for(mb_x=0; mb_x<=mb_w; mb_x++){
02663             START_TIMER
02664 
02665             add_yblock(s, 1, sb, old_buffer, dst8, obmc,
02666                        block_w*mb_x - block_w/2,
02667                        block_w*mb_y - block_w/2,
02668                        block_w, block_w,
02669                        w, h,
02670                        w, ref_stride, obmc_stride,
02671                        mb_x - 1, mb_y - 1,
02672                        add, 0, plane_index);
02673 
02674             STOP_TIMER("add_yblock")
02675         }
02676 
02677     STOP_TIMER("predict_slice")
02678 }
02679 
02680 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
02681     Plane *p= &s->plane[plane_index];
02682     const int mb_w= s->b_width  << s->block_max_depth;
02683     const int mb_h= s->b_height << s->block_max_depth;
02684     int x, y, mb_x;
02685     int block_size = MB_SIZE >> s->block_max_depth;
02686     int block_w    = plane_index ? block_size/2 : block_size;
02687     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02688     const int obmc_stride= plane_index ? block_size : 2*block_size;
02689     int ref_stride= s->current_picture.linesize[plane_index];
02690     uint8_t *dst8= s->current_picture.data[plane_index];
02691     int w= p->width;
02692     int h= p->height;
02693     START_TIMER
02694 
02695     if(s->keyframe || (s->avctx->debug&512)){
02696         if(mb_y==mb_h)
02697             return;
02698 
02699         if(add){
02700             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
02701                 for(x=0; x<w; x++){
02702                     int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
02703                     v >>= FRAC_BITS;
02704                     if(v&(~255)) v= ~(v>>31);
02705                     dst8[x + y*ref_stride]= v;
02706                 }
02707             }
02708         }else{
02709             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
02710                 for(x=0; x<w; x++){
02711                     buf[x + y*w]-= 128<<FRAC_BITS;
02712                 }
02713             }
02714         }
02715 
02716         return;
02717     }
02718 
02719         for(mb_x=0; mb_x<=mb_w; mb_x++){
02720             START_TIMER
02721 
02722             add_yblock(s, 0, NULL, buf, dst8, obmc,
02723                        block_w*mb_x - block_w/2,
02724                        block_w*mb_y - block_w/2,
02725                        block_w, block_w,
02726                        w, h,
02727                        w, ref_stride, obmc_stride,
02728                        mb_x - 1, mb_y - 1,
02729                        add, 1, plane_index);
02730 
02731             STOP_TIMER("add_yblock")
02732         }
02733 
02734     STOP_TIMER("predict_slice")
02735 }
02736 
02737 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
02738     const int mb_h= s->b_height << s->block_max_depth;
02739     int mb_y;
02740     for(mb_y=0; mb_y<=mb_h; mb_y++)
02741         predict_slice(s, buf, plane_index, add, mb_y);
02742 }
02743 
02744 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
02745     int i, x2, y2;
02746     Plane *p= &s->plane[plane_index];
02747     const int block_size = MB_SIZE >> s->block_max_depth;
02748     const int block_w    = plane_index ? block_size/2 : block_size;
02749     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02750     const int obmc_stride= plane_index ? block_size : 2*block_size;
02751     const int ref_stride= s->current_picture.linesize[plane_index];
02752     uint8_t *src= s-> input_picture.data[plane_index];
02753     IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
02754     const int b_stride = s->b_width << s->block_max_depth;
02755     const int w= p->width;
02756     const int h= p->height;
02757     int index= mb_x + mb_y*b_stride;
02758     BlockNode *b= &s->block[index];
02759     BlockNode backup= *b;
02760     int ab=0;
02761     int aa=0;
02762 
02763     b->type|= BLOCK_INTRA;
02764     b->color[plane_index]= 0;
02765     memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
02766 
02767     for(i=0; i<4; i++){
02768         int mb_x2= mb_x + (i &1) - 1;
02769         int mb_y2= mb_y + (i>>1) - 1;
02770         int x= block_w*mb_x2 + block_w/2;
02771         int y= block_w*mb_y2 + block_w/2;
02772 
02773         add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
02774                     x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
02775 
02776         for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
02777             for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
02778                 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
02779                 int obmc_v= obmc[index];
02780                 int d;
02781                 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
02782                 if(x<0) obmc_v += obmc[index + block_w];
02783                 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
02784                 if(x+block_w>w) obmc_v += obmc[index - block_w];
02785                 //FIXME precalc this or simplify it somehow else
02786 
02787                 d = -dst[index] + (1<<(FRAC_BITS-1));
02788                 dst[index] = d;
02789                 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
02790                 aa += obmc_v * obmc_v; //FIXME precalclate this
02791             }
02792         }
02793     }
02794     *b= backup;
02795 
02796     return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
02797 }
02798 
02799 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
02800     const int b_stride = s->b_width << s->block_max_depth;
02801     const int b_height = s->b_height<< s->block_max_depth;
02802     int index= x + y*b_stride;
02803     const BlockNode *b     = &s->block[index];
02804     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
02805     const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
02806     const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
02807     const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
02808     int dmx, dmy;
02809 //  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
02810 //  int my_context= av_log2(2*FFABS(left->my - top->my));
02811 
02812     if(x<0 || x>=b_stride || y>=b_height)
02813         return 0;
02814 /*
02815 1            0      0
02816 01X          1-2    1
02817 001XX        3-6    2-3
02818 0001XXX      7-14   4-7
02819 00001XXXX   15-30   8-15
02820 */
02821 //FIXME try accurate rate
02822 //FIXME intra and inter predictors if surrounding blocks arent the same type
02823     if(b->type & BLOCK_INTRA){
02824         return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
02825                    + av_log2(2*FFABS(left->color[1] - b->color[1]))
02826                    + av_log2(2*FFABS(left->color[2] - b->color[2])));
02827     }else{
02828         pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
02829         dmx-= b->mx;
02830         dmy-= b->my;
02831         return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
02832                     + av_log2(2*FFABS(dmy))
02833                     + av_log2(2*b->ref));
02834     }
02835 }
02836 
02837 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
02838     Plane *p= &s->plane[plane_index];
02839     const int block_size = MB_SIZE >> s->block_max_depth;
02840     const int block_w    = plane_index ? block_size/2 : block_size;
02841     const int obmc_stride= plane_index ? block_size : 2*block_size;
02842     const int ref_stride= s->current_picture.linesize[plane_index];
02843     uint8_t *dst= s->current_picture.data[plane_index];
02844     uint8_t *src= s->  input_picture.data[plane_index];
02845     IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
02846     uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
02847     uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
02848     const int b_stride = s->b_width << s->block_max_depth;
02849     const int b_height = s->b_height<< s->block_max_depth;
02850     const int w= p->width;
02851     const int h= p->height;
02852     int distortion;
02853     int rate= 0;
02854     const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
02855     int sx= block_w*mb_x - block_w/2;
02856     int sy= block_w*mb_y - block_w/2;
02857     int x0= FFMAX(0,-sx);
02858     int y0= FFMAX(0,-sy);
02859     int x1= FFMIN(block_w*2, w-sx);
02860     int y1= FFMIN(block_w*2, h-sy);
02861     int i,x,y;
02862 
02863     pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
02864 
02865     for(y=y0; y<y1; y++){
02866         const uint8_t *obmc1= obmc_edged + y*obmc_stride;
02867         const IDWTELEM *pred1 = pred + y*obmc_stride;
02868         uint8_t *cur1 = cur + y*ref_stride;
02869         uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
02870         for(x=x0; x<x1; x++){
02871 #if FRAC_BITS >= LOG2_OBMC_MAX
02872             int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
02873 #else
02874             int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
02875 #endif
02876             v = (v + pred1[x]) >> FRAC_BITS;
02877             if(v&(~255)) v= ~(v>>31);
02878             dst1[x] = v;
02879         }
02880     }
02881 
02882     /* copy the regions where obmc[] = (uint8_t)256 */
02883     if(LOG2_OBMC_MAX == 8
02884         && (mb_x == 0 || mb_x == b_stride-1)
02885         && (mb_y == 0 || mb_y == b_height-1)){
02886         if(mb_x == 0)
02887             x1 = block_w;
02888         else
02889             x0 = block_w;
02890         if(mb_y == 0)
02891             y1 = block_w;
02892         else
02893             y0 = block_w;
02894         for(y=y0; y<y1; y++)
02895             memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
02896     }
02897 
02898     if(block_w==16){
02899         /* FIXME rearrange dsputil to fit 32x32 cmp functions */
02900         /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
02901         /* FIXME cmps overlap but don't cover the wavelet's whole support,
02902          * so improving the score of one block is not strictly guaranteed to
02903          * improve the score of the whole frame, so iterative motion est
02904          * doesn't always converge. */
02905         if(s->avctx->me_cmp == FF_CMP_W97)
02906             distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
02907         else if(s->avctx->me_cmp == FF_CMP_W53)
02908             distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
02909         else{
02910             distortion = 0;
02911             for(i=0; i<4; i++){
02912                 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
02913                 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
02914             }
02915         }
02916     }else{
02917         assert(block_w==8);
02918         distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
02919     }
02920 
02921     if(plane_index==0){
02922         for(i=0; i<4; i++){
02923 /* ..RRr
02924  * .RXx.
02925  * rxx..
02926  */
02927             rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
02928         }
02929         if(mb_x == b_stride-2)
02930             rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
02931     }
02932     return distortion + rate*penalty_factor;
02933 }
02934 
02935 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
02936     int i, y2;
02937     Plane *p= &s->plane[plane_index];
02938     const int block_size = MB_SIZE >> s->block_max_depth;
02939     const int block_w    = plane_index ? block_size/2 : block_size;
02940     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02941     const int obmc_stride= plane_index ? block_size : 2*block_size;
02942     const int ref_stride= s->current_picture.linesize[plane_index];
02943     uint8_t *dst= s->current_picture.data[plane_index];
02944     uint8_t *src= s-> input_picture.data[plane_index];
02945     static const IDWTELEM zero_dst[4096]; //FIXME
02946     const int b_stride = s->b_width << s->block_max_depth;
02947     const int w= p->width;
02948     const int h= p->height;
02949     int distortion= 0;
02950     int rate= 0;
02951     const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
02952 
02953     for(i=0; i<9; i++){
02954         int mb_x2= mb_x + (i%3) - 1;
02955         int mb_y2= mb_y + (i/3) - 1;
02956         int x= block_w*mb_x2 + block_w/2;
02957         int y= block_w*mb_y2 + block_w/2;
02958 
02959         add_yblock(s, 0, NULL, zero_dst, dst, obmc,
02960                    x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
02961 
02962         //FIXME find a cleaner/simpler way to skip the outside stuff
02963         for(y2= y; y2<0; y2++)
02964             memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
02965         for(y2= h; y2<y+block_w; y2++)
02966             memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
02967         if(x<0){
02968             for(y2= y; y2<y+block_w; y2++)
02969                 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
02970         }
02971         if(x+block_w > w){
02972             for(y2= y; y2<y+block_w; y2++)
02973                 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
02974         }
02975 
02976         assert(block_w== 8 || block_w==16);
02977         distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
02978     }
02979 
02980     if(plane_index==0){
02981         BlockNode *b= &s->block[mb_x+mb_y*b_stride];
02982         int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
02983 
02984 /* ..RRRr
02985  * .RXXx.
02986  * .RXXx.
02987  * rxxx.
02988  */
02989         if(merged)
02990             rate = get_block_bits(s, mb_x, mb_y, 2);
02991         for(i=merged?4:0; i<9; i++){
02992             static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
02993             rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
02994         }
02995     }
02996     return distortion + rate*penalty_factor;
02997 }
02998 
02999 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
03000     const int b_stride= s->b_width << s->block_max_depth;
03001     BlockNode *block= &s->block[mb_x + mb_y * b_stride];
03002     BlockNode backup= *block;
03003     int rd, index, value;
03004 
03005     assert(mb_x>=0 && mb_y>=0);
03006     assert(mb_x<b_stride);
03007 
03008     if(intra){
03009         block->color[0] = p[0];
03010         block->color[1] = p[1];
03011         block->color[2] = p[2];
03012         block->type |= BLOCK_INTRA;
03013     }else{
03014         index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
03015         value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
03016         if(s->me_cache[index] == value)
03017             return 0;
03018         s->me_cache[index]= value;
03019 
03020         block->mx= p[0];
03021         block->my= p[1];
03022         block->type &= ~BLOCK_INTRA;
03023     }
03024 
03025     rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
03026 
03027 //FIXME chroma
03028     if(rd < *best_rd){
03029         *best_rd= rd;
03030         return 1;
03031     }else{
03032         *block= backup;
03033         return 0;
03034     }
03035 }
03036 
03037 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
03038 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
03039     int p[2] = {p0, p1};
03040     return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
03041 }
03042 
03043 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
03044     const int b_stride= s->b_width << s->block_max_depth;
03045     BlockNode *block= &s->block[mb_x + mb_y * b_stride];
03046     BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
03047     int rd, index, value;
03048 
03049     assert(mb_x>=0 && mb_y>=0);
03050     assert(mb_x<b_stride);
03051     assert(((mb_x|mb_y)&1) == 0);
03052 
03053     index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
03054     value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
03055     if(s->me_cache[index] == value)
03056         return 0;
03057     s->me_cache[index]= value;
03058 
03059     block->mx= p0;
03060     block->my= p1;
03061     block->ref= ref;
03062     block->type &= ~BLOCK_INTRA;
03063     block[1]= block[b_stride]= block[b_stride+1]= *block;
03064 
03065     rd= get_4block_rd(s, mb_x, mb_y, 0);
03066 
03067 //FIXME chroma
03068     if(rd < *best_rd){
03069         *best_rd= rd;
03070         return 1;
03071     }else{
03072         block[0]= backup[0];
03073         block[1]= backup[1];
03074         block[b_stride]= backup[2];
03075         block[b_stride+1]= backup[3];
03076         return 0;
03077     }
03078 }
03079 
03080 static void iterative_me(SnowContext *s){
03081     int pass, mb_x, mb_y;
03082     const int b_width = s->b_width  << s->block_max_depth;
03083     const int b_height= s->b_height << s->block_max_depth;
03084     const int b_stride= b_width;
03085     int color[3];
03086 
03087     {
03088         RangeCoder r = s->c;
03089         uint8_t state[sizeof(s->block_state)];
03090         memcpy(state, s->block_state, sizeof(s->block_state));
03091         for(mb_y= 0; mb_y<s->b_height; mb_y++)
03092             for(mb_x= 0; mb_x<s->b_width; mb_x++)
03093                 encode_q_branch(s, 0, mb_x, mb_y);
03094         s->c = r;
03095         memcpy(s->block_state, state, sizeof(s->block_state));
03096     }
03097 
03098     for(pass=0; pass<25; pass++){
03099         int change= 0;
03100 
03101         for(mb_y= 0; mb_y<b_height; mb_y++){
03102             for(mb_x= 0; mb_x<b_width; mb_x++){
03103                 int dia_change, i, j, ref;
03104                 int best_rd= INT_MAX, ref_rd;
03105                 BlockNode backup, ref_b;
03106                 const int index= mb_x + mb_y * b_stride;
03107                 BlockNode *block= &s->block[index];
03108                 BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
03109                 BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
03110                 BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
03111                 BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
03112                 BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
03113                 BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
03114                 BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
03115                 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
03116                 const int b_w= (MB_SIZE >> s->block_max_depth);
03117                 uint8_t obmc_edged[b_w*2][b_w*2];
03118 
03119                 if(pass && (block->type & BLOCK_OPT))
03120                     continue;
03121                 block->type |= BLOCK_OPT;
03122 
03123                 backup= *block;
03124 
03125                 if(!s->me_cache_generation)
03126                     memset(s->me_cache, 0, sizeof(s->me_cache));
03127                 s->me_cache_generation += 1<<22;
03128 
03129                 //FIXME precalc
03130                 {
03131                     int x, y;
03132                     memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
03133                     if(mb_x==0)
03134                         for(y=0; y<b_w*2; y++)
03135                             memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
03136                     if(mb_x==b_stride-1)
03137                         for(y=0; y<b_w*2; y++)
03138                             memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
03139                     if(mb_y==0){
03140                         for(x=0; x<b_w*2; x++)
03141                             obmc_edged[0][x] += obmc_edged[b_w-1][x];
03142                         for(y=1; y<b_w; y++)
03143                             memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
03144                     }
03145                     if(mb_y==b_height-1){
03146                         for(x=0; x<b_w*2; x++)
03147                             obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
03148                         for(y=b_w; y<b_w*2-1; y++)
03149                             memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
03150                     }
03151                 }
03152 
03153                 //skip stuff outside the picture
03154                 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
03155                 {
03156                     uint8_t *src= s->  input_picture.data[0];
03157                     uint8_t *dst= s->current_picture.data[0];
03158                     const int stride= s->current_picture.linesize[0];
03159                     const int block_w= MB_SIZE >> s->block_max_depth;
03160                     const int sx= block_w*mb_x - block_w/2;
03161                     const int sy= block_w*mb_y - block_w/2;
03162                     const int w= s->plane[0].width;
03163                     const int h= s->plane[0].height;
03164                     int y;
03165 
03166                     for(y=sy; y<0; y++)
03167                         memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
03168                     for(y=h; y<sy+block_w*2; y++)
03169                         memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
03170                     if(sx<0){
03171                         for(y=sy; y<sy+block_w*2; y++)
03172                             memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
03173                     }
03174                     if(sx+block_w*2 > w){
03175                         for(y=sy; y<sy+block_w*2; y++)
03176                             memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
03177                     }
03178                 }
03179 
03180                 // intra(black) = neighbors' contribution to the current block
03181                 for(i=0; i<3; i++)
03182                     color[i]= get_dc(s, mb_x, mb_y, i);
03183 
03184                 // get previous score (cannot be cached due to OBMC)
03185                 if(pass > 0 && (block->type&BLOCK_INTRA)){
03186                     int color0[3]= {block->color[0], block->color[1], block->color[2]};
03187                     check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
03188                 }else
03189                     check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
03190 
03191                 ref_b= *block;
03192                 ref_rd= best_rd;
03193                 for(ref=0; ref < s->ref_frames; ref++){
03194                     int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
03195                     if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
03196                         continue;
03197                     block->ref= ref;
03198                     best_rd= INT_MAX;
03199 
03200                     check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
03201                     check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
03202                     if(tb)
03203                         check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
03204                     if(lb)
03205                         check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
03206                     if(rb)
03207                         check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
03208                     if(bb)
03209                         check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
03210 
03211                     /* fullpel ME */
03212                     //FIXME avoid subpel interpol / round to nearest integer
03213                     do{
03214                         dia_change=0;
03215                         for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
03216                             for(j=0; j<i; j++){
03217                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
03218                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
03219                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
03220                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
03221                             }
03222                         }
03223                     }while(dia_change);
03224                     /* subpel ME */
03225                     do{
03226                         static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
03227                         dia_change=0;
03228                         for(i=0; i<8; i++)
03229                             dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
03230                     }while(dia_change);
03231                     //FIXME or try the standard 2 pass qpel or similar
03232 
03233                     mvr[0][0]= block->mx;
03234                     mvr[0][1]= block->my;
03235                     if(ref_rd > best_rd){
03236                         ref_rd= best_rd;
03237                         ref_b= *block;
03238                     }
03239                 }
03240                 best_rd= ref_rd;
03241                 *block= ref_b;
03242 #if 1
03243                 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
03244                 //FIXME RD style color selection
03245 #endif
03246                 if(!same_block(block, &backup)){
03247                     if(tb ) tb ->type &= ~BLOCK_OPT;
03248                     if(lb ) lb ->type &= ~BLOCK_OPT;
03249                     if(rb ) rb ->type &= ~BLOCK_OPT;
03250                     if(bb ) bb ->type &= ~BLOCK_OPT;
03251                     if(tlb) tlb->type &= ~BLOCK_OPT;
03252                     if(trb) trb->type &= ~BLOCK_OPT;
03253                     if(blb) blb->type &= ~BLOCK_OPT;
03254                     if(brb) brb->type &= ~BLOCK_OPT;
03255                     change ++;
03256                 }
03257             }
03258         }
03259         av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
03260         if(!change)
03261             break;
03262     }
03263 
03264     if(s->block_max_depth == 1){
03265         int change= 0;
03266         for(mb_y= 0; mb_y<b_height; mb_y+=2){
03267             for(mb_x= 0; mb_x<b_width; mb_x+=2){
03268                 int i;
03269                 int best_rd, init_rd;
03270                 const int index= mb_x + mb_y * b_stride;
03271                 BlockNode *b[4];
03272 
03273                 b[0]= &s->block[index];
03274                 b[1]= b[0]+1;
03275                 b[2]= b[0]+b_stride;
03276                 b[3]= b[2]+1;
03277                 if(same_block(b[0], b[1]) &&
03278                    same_block(b[0], b[2]) &&
03279                    same_block(b[0], b[3]))
03280                     continue;
03281 
03282                 if(!s->me_cache_generation)
03283                     memset(s->me_cache, 0, sizeof(s->me_cache));
03284                 s->me_cache_generation += 1<<22;
03285 
03286                 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
03287 
03288                 //FIXME more multiref search?
03289                 check_4block_inter(s, mb_x, mb_y,
03290                                    (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
03291                                    (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
03292 
03293                 for(i=0; i<4; i++)
03294                     if(!(b[i]->type&BLOCK_INTRA))
03295                         check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
03296 
03297                 if(init_rd != best_rd)
03298                     change++;
03299             }
03300         }
03301         av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
03302     }
03303 }
03304 
03305 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
03306     const int level= b->level;
03307     const int w= b->width;
03308     const int h= b->height;
03309     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
03310     const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
03311     int x,y, thres1, thres2;
03312 //    START_TIMER
03313 
03314     if(s->qlog == LOSSLESS_QLOG){
03315         for(y=0; y<h; y++)
03316             for(x=0; x<w; x++)
03317                 dst[x + y*stride]= src[x + y*stride];
03318         return;
03319     }
03320 
03321     bias= bias ? 0 : (3*qmul)>>3;
03322     thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
03323     thres2= 2*thres1;
03324 
03325     if(!bias){
03326         for(y=0; y<h; y++){
03327             for(x=0; x<w; x++){
03328                 int i= src[x + y*stride];
03329 
03330                 if((unsigned)(i+thres1) > thres2){
03331                     if(i>=0){
03332                         i<<= QEXPSHIFT;
03333                         i/= qmul; //FIXME optimize
03334                         dst[x + y*stride]=  i;
03335                     }else{
03336                         i= -i;
03337                         i<<= QEXPSHIFT;
03338                         i/= qmul; //FIXME optimize
03339                         dst[x + y*stride]= -i;
03340                     }
03341                 }else
03342                     dst[x + y*stride]= 0;
03343             }
03344         }
03345     }else{
03346         for(y=0; y<h; y++){
03347             for(x=0; x<w; x++){
03348                 int i= src[x + y*stride];
03349 
03350                 if((unsigned)(i+thres1) > thres2){
03351                     if(i>=0){
03352                         i<<= QEXPSHIFT;
03353                         i= (i + bias) / qmul; //FIXME optimize
03354                         dst[x + y*stride]=  i;
03355                     }else{
03356                         i= -i;
03357                         i<<= QEXPSHIFT;
03358                         i= (i + bias) / qmul; //FIXME optimize
03359                         dst[x + y*stride]= -i;
03360                     }
03361                 }else
03362                     dst[x + y*stride]= 0;
03363             }
03364         }
03365     }
03366     if(level+1 == s->spatial_decomposition_count){
03367 //        STOP_TIMER("quantize")
03368     }
03369 }
03370 
03371 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
03372     const int w= b->width;
03373     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
03374     const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
03375     const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
03376     int x,y;
03377     START_TIMER
03378 
03379     if(s->qlog == LOSSLESS_QLOG) return;
03380 
03381     for(y=start_y; y<end_y; y++){
03382 //        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
03383         IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
03384         for(x=0; x<w; x++){
03385             int i= line[x];
03386             if(i<0){
03387                 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
03388             }else if(i>0){
03389                 line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
03390             }
03391         }
03392     }
03393     if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
03394         STOP_TIMER("dquant")
03395     }
03396 }
03397 
03398 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
03399     const int w= b->width;
03400     const int h= b->height;
03401     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
03402     const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
03403     const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
03404     int x,y;
03405     START_TIMER
03406 
03407     if(s->qlog == LOSSLESS_QLOG) return;
03408 
03409     for(y=0; y<h; y++){
03410         for(x=0; x<w; x++){
03411             int i= src[x + y*stride];
03412             if(i<0){
03413                 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
03414             }else if(i>0){
03415                 src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
03416             }
03417         }
03418     }
03419     if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
03420         STOP_TIMER("dquant")
03421     }
03422 }
03423 
03424 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
03425     const int w= b->width;
03426     const int h= b->height;
03427     int x,y;
03428 
03429     for(y=h-1; y>=0; y--){
03430         for(x=w-1; x>=0; x--){
03431             int i= x + y*stride;
03432 
03433             if(x){
03434                 if(use_median){
03435                     if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
03436                     else  src[i] -= src[i - 1];
03437                 }else{
03438                     if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
03439                     else  src[i] -= src[i - 1];
03440                 }
03441             }else{
03442                 if(y) src[i] -= src[i - stride];
03443             }
03444         }
03445     }
03446 }
03447 
03448 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
03449     const int w= b->width;
03450     int x,y;
03451 
03452 //    START_TIMER
03453 
03454     IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
03455     IDWTELEM * prev;
03456 
03457     if (start_y != 0)
03458         line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
03459 
03460     for(y=start_y; y<end_y; y++){
03461         prev = line;
03462 //        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
03463         line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
03464         for(x=0; x<w; x++){
03465             if(x){
03466                 if(use_median){
03467                     if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
03468                     else  line[x] += line[x - 1];
03469                 }else{
03470                     if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
03471                     else  line[x] += line[x - 1];
03472                 }
03473             }else{
03474                 if(y) line[x] += prev[x];
03475             }
03476         }
03477     }
03478 
03479 //    STOP_TIMER("correlate")
03480 }
03481 
03482 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
03483     const int w= b->width;
03484     const int h= b->height;
03485     int x,y;
03486 
03487     for(y=0; y<h; y++){
03488         for(x=0; x<w; x++){
03489             int i= x + y*stride;
03490 
03491             if(x){
03492                 if(use_median){
03493                     if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
03494                     else  src[i] += src[i - 1];
03495                 }else{
03496                     if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
03497                     else  src[i] += src[i - 1];
03498                 }
03499             }else{
03500                 if(y) src[i] += src[i - stride];
03501             }
03502         }
03503     }
03504 }
03505 
03506 static void encode_qlogs(SnowContext *s){
03507     int plane_index, level, orientation;
03508 
03509     for(plane_index=0; plane_index<2; plane_index++){
03510         for(level=0; level<s->spatial_decomposition_count; level++){
03511             for(orientation=level ? 1:0; orientation<4; orientation++){
03512                 if(orientation==2) continue;
03513                 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
03514             }
03515         }
03516     }
03517 }
03518 
03519 static void encode_header(SnowContext *s){
03520     int plane_index, i;
03521     uint8_t kstate[32];
03522 
03523     memset(kstate, MID_STATE, sizeof(kstate));
03524 
03525     put_rac(&s->c, kstate, s->keyframe);
03526     if(s->keyframe || s->always_reset){
03527         reset_contexts(s);
03528         s->last_spatial_decomposition_type=
03529         s->last_qlog=
03530         s->last_qbias=
03531         s->last_mv_scale=
03532         s->last_block_max_depth= 0;
03533         for(plane_index=0; plane_index<2; plane_index++){
03534             Plane *p= &s->plane[plane_index];
03535             p->last_htaps=0;
03536             p->last_diag_mc=0;
03537             memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
03538         }
03539     }
03540     if(s->keyframe){
03541         put_symbol(&s->c, s->header_state, s->version, 0);
03542         put_rac(&s->c, s->header_state, s->always_reset);
03543         put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
03544         put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
03545         put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
03546         put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
03547         put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
03548         put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
03549         put_rac(&s->c, s->header_state, s->spatial_scalability);
03550 //        put_rac(&s->c, s->header_state, s->rate_scalability);
03551         put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
03552 
03553         encode_qlogs(s);
03554     }
03555 
03556     if(!s->keyframe){
03557         int update_mc=0;
03558         for(plane_index=0; plane_index<2; plane_index++){
03559             Plane *p= &s->plane[plane_index];
03560             update_mc |= p->last_htaps   != p->htaps;
03561             update_mc |= p->last_diag_mc != p->diag_mc;
03562             update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
03563         }
03564         put_rac(&s->c, s->header_state, update_mc);
03565         if(update_mc){
03566             for(plane_index=0; plane_index<2; plane_index++){
03567                 Plane *p= &s->plane[plane_index];
03568                 put_rac(&s->c, s->header_state, p->diag_mc);
03569                 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
03570                 for(i= p->htaps/2; i; i--)
03571                     put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
03572 
03573                 p->last_diag_mc= p->diag_mc;
03574                 p->last_htaps= p->htaps;
03575                 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
03576             }
03577         }
03578         if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
03579             put_rac(&s->c, s->header_state, 1);
03580             put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
03581             encode_qlogs(s);
03582         }else
03583             put_rac(&s->c, s->header_state, 0);
03584     }
03585 
03586     put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
03587     put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
03588     put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
03589     put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
03590     put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
03591 
03592     s->last_spatial_decomposition_type= s->spatial_decomposition_type;
03593     s->last_qlog                      = s->qlog;
03594     s->last_qbias                     = s->qbias;
03595     s->last_mv_scale                  = s->mv_scale;
03596     s->last_block_max_depth           = s->block_max_depth;
03597     s->last_spatial_decomposition_count= s->spatial_decomposition_count;
03598 }
03599 
03600 static void decode_qlogs(SnowContext *s){
03601     int plane_index, level, orientation;
03602 
03603     for(plane_index=0; plane_index<3; plane_index++){
03604         for(level=0; level<s->spatial_decomposition_count; level++){
03605             for(orientation=level ? 1:0; orientation<4; orientation++){
03606                 int q;
03607                 if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
03608                 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
03609                 else                    q= get_symbol(&s->c, s->header_state, 1);
03610                 s->plane[plane_index].band[level][orientation].qlog= q;
03611             }
03612         }
03613     }
03614 }
03615 
03616 static int decode_header(SnowContext *s){
03617     int plane_index;
03618     uint8_t kstate[32];
03619 
03620     memset(kstate, MID_STATE, sizeof(kstate));
03621 
03622     s->keyframe= get_rac(&s->c, kstate);
03623     if(s->keyframe || s->always_reset){
03624         reset_contexts(s);
03625         s->spatial_decomposition_type=
03626         s->qlog=
03627         s->qbias=
03628         s->mv_scale=
03629         s->block_max_depth= 0;
03630     }
03631     if(s->keyframe){
03632         s->version= get_symbol(&s->c, s->header_state, 0);
03633         if(s->version>0){
03634             av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
03635             return -1;
03636         }
03637         s->always_reset= get_rac(&s->c, s->header_state);
03638         s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
03639         s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
03640         s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
03641         s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
03642         s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
03643         s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
03644         s->spatial_scalability= get_rac(&s->c, s->header_state);
03645 //        s->rate_scalability= get_rac(&s->c, s->header_state);
03646         s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
03647 
03648         decode_qlogs(s);
03649     }
03650 
03651     if(!s->keyframe){
03652         if(get_rac(&s->c, s->header_state)){
03653             for(plane_index=0; plane_index<2; plane_index++){
03654                 int htaps, i, sum=0;
03655                 Plane *p= &s->plane[plane_index];
03656                 p->diag_mc= get_rac(&s->c, s->header_state);
03657                 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
03658                 if((unsigned)htaps > HTAPS_MAX || htaps==0)
03659                     return -1;
03660                 p->htaps= htaps;
03661                 for(i= htaps/2; i; i--){
03662                     p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
03663                     sum += p->hcoeff[i];
03664                 }
03665                 p->hcoeff[0]= 32-sum;
03666             }
03667             s->plane[2].diag_mc= s->plane[1].diag_mc;
03668             s->plane[2].htaps  = s->plane[1].htaps;
03669             memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
03670         }
03671         if(get_rac(&s->c, s->header_state)){
03672             s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
03673             decode_qlogs(s);
03674         }
03675     }
03676 
03677     s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
03678     if(s->spatial_decomposition_type > 1){
03679         av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
03680         return -1;
03681     }
03682 
03683     s->