00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "avcodec.h"
00022 #include "dsputil.h"
00023 #include "snow.h"
00024
00025 #include "rangecoder.h"
00026
00027 #include "mpegvideo.h"
00028
00029 #undef NDEBUG
00030 #include <assert.h>
00031
00032 static const int8_t quant3[256]={
00033 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00034 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00035 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00036 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00037 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00038 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00039 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00040 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00041 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00042 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00043 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00044 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00045 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00046 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00047 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00048 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
00049 };
00050 static const int8_t quant3b[256]={
00051 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00052 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00053 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00054 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00055 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00056 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00057 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00058 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00059 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00060 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00061 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00062 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00063 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00064 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00065 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00066 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00067 };
00068 static const int8_t quant3bA[256]={
00069 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00070 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00071 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00072 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00073 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00074 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00075 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00076 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00077 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00078 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00079 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00080 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00081 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00082 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00083 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00084 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00085 };
00086 static const int8_t quant5[256]={
00087 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00088 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00089 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00090 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00091 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00092 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00093 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00094 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00095 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00096 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00097 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00098 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00099 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
00103 };
00104 static const int8_t quant7[256]={
00105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
00108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
00119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
00121 };
00122 static const int8_t quant9[256]={
00123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
00138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
00139 };
00140 static const int8_t quant11[256]={
00141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
00142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
00155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
00157 };
00158 static const int8_t quant13[256]={
00159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
00160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
00172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
00175 };
00176
00177 #if 0 //64*cubic
00178 static const uint8_t obmc32[1024]={
00179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00180 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
00181 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
00182 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
00183 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
00184 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
00185 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
00186 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
00187 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
00188 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
00189 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
00190 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
00191 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
00192 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
00193 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
00194 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
00195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
00196 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
00197 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
00198 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
00199 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
00200 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
00201 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
00202 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
00203 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
00204 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
00205 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
00206 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
00207 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
00208 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
00209 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
00210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00211
00212 };
00213 static const uint8_t obmc16[256]={
00214 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
00215 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
00216 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
00217 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
00218 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
00219 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
00220 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
00221 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
00222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
00223 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
00224 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
00225 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
00226 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
00227 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
00228 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
00229 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
00230
00231 };
00232 #elif 1 // 64*linear
00233 static const uint8_t obmc32[1024]={
00234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
00235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
00236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
00237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
00238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
00239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
00240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
00241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
00242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
00243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
00244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
00245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
00246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
00247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
00248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
00249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
00250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
00251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
00252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
00253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
00254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
00255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
00256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
00257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
00258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
00259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
00260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
00261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
00262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
00263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
00264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
00265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
00266
00267 };
00268 static const uint8_t obmc16[256]={
00269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
00270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
00271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
00272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
00273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
00274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
00275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
00276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
00277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
00278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
00279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
00280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
00281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
00282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
00283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
00284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
00285
00286 };
00287 #else //64*cos
00288 static const uint8_t obmc32[1024]={
00289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00290 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
00291 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
00292 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
00293 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
00294 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
00295 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
00296 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
00297 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
00298 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
00299 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
00300 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
00301 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
00302 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
00303 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
00304 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
00305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
00306 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
00307 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
00308 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
00309 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
00310 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
00311 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
00312 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
00313 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
00314 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
00315 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
00316 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
00317 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
00318 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
00319 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
00320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00321
00322 };
00323 static const uint8_t obmc16[256]={
00324 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
00325 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
00326 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
00327 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
00328 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
00329 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
00330 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
00331 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
00332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
00333 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
00334 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
00335 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
00336 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
00337 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
00338 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
00339 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
00340
00341 };
00342 #endif
00343
00344
00345 static const uint8_t obmc8[64]={
00346 4, 12, 20, 28, 28, 20, 12, 4,
00347 12, 36, 60, 84, 84, 60, 36, 12,
00348 20, 60,100,140,140,100, 60, 20,
00349 28, 84,140,196,196,140, 84, 28,
00350 28, 84,140,196,196,140, 84, 28,
00351 20, 60,100,140,140,100, 60, 20,
00352 12, 36, 60, 84, 84, 60, 36, 12,
00353 4, 12, 20, 28, 28, 20, 12, 4,
00354
00355 };
00356
00357
00358 static const uint8_t obmc4[16]={
00359 16, 48, 48, 16,
00360 48,144,144, 48,
00361 48,144,144, 48,
00362 16, 48, 48, 16,
00363
00364 };
00365
00366 static const uint8_t *obmc_tab[4]={
00367 obmc32, obmc16, obmc8, obmc4
00368 };
00369
00370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
00371
00372 typedef struct BlockNode{
00373 int16_t mx;
00374 int16_t my;
00375 uint8_t ref;
00376 uint8_t color[3];
00377 uint8_t type;
00378
00379 #define BLOCK_INTRA 1
00380 #define BLOCK_OPT 2
00381
00382 uint8_t level;
00383 }BlockNode;
00384
00385 static const BlockNode null_block= {
00386 .color= {128,128,128},
00387 .mx= 0,
00388 .my= 0,
00389 .ref= 0,
00390 .type= 0,
00391 .level= 0,
00392 };
00393
00394 #define LOG2_MB_SIZE 4
00395 #define MB_SIZE (1<<LOG2_MB_SIZE)
00396 #define ENCODER_EXTRA_BITS 4
00397 #define HTAPS_MAX 8
00398
00399 typedef struct x_and_coeff{
00400 int16_t x;
00401 uint16_t coeff;
00402 } x_and_coeff;
00403
00404 typedef struct SubBand{
00405 int level;
00406 int stride;
00407 int width;
00408 int height;
00409 int qlog;
00410 DWTELEM *buf;
00411 IDWTELEM *ibuf;
00412 int buf_x_offset;
00413 int buf_y_offset;
00414 int stride_line;
00415 x_and_coeff * x_coeff;
00416 struct SubBand *parent;
00417 uint8_t state[ 7 + 512][32];
00418 }SubBand;
00419
00420 typedef struct Plane{
00421 int width;
00422 int height;
00423 SubBand band[MAX_DECOMPOSITIONS][4];
00424
00425 int htaps;
00426 int8_t hcoeff[HTAPS_MAX/2];
00427 int diag_mc;
00428 int fast_mc;
00429
00430 int last_htaps;
00431 int8_t last_hcoeff[HTAPS_MAX/2];
00432 int last_diag_mc;
00433 }Plane;
00434
00435 typedef struct SnowContext{
00436
00437
00438 AVCodecContext *avctx;
00439 RangeCoder c;
00440 DSPContext dsp;
00441 AVFrame new_picture;
00442 AVFrame input_picture;
00443 AVFrame current_picture;
00444 AVFrame last_picture[MAX_REF_FRAMES];
00445 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
00446 AVFrame mconly_picture;
00447
00448 uint8_t header_state[32];
00449 uint8_t block_state[128 + 32*128];
00450 int keyframe;
00451 int always_reset;
00452 int version;
00453 int spatial_decomposition_type;
00454 int last_spatial_decomposition_type;
00455 int temporal_decomposition_type;
00456 int spatial_decomposition_count;
00457 int last_spatial_decomposition_count;
00458 int temporal_decomposition_count;
00459 int max_ref_frames;
00460 int ref_frames;
00461 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
00462 uint32_t *ref_scores[MAX_REF_FRAMES];
00463 DWTELEM *spatial_dwt_buffer;
00464 IDWTELEM *spatial_idwt_buffer;
00465 int colorspace_type;
00466 int chroma_h_shift;
00467 int chroma_v_shift;
00468 int spatial_scalability;
00469 int qlog;
00470 int last_qlog;
00471 int lambda;
00472 int lambda2;
00473 int pass1_rc;
00474 int mv_scale;
00475 int last_mv_scale;
00476 int qbias;
00477 int last_qbias;
00478 #define QBIAS_SHIFT 3
00479 int b_width;
00480 int b_height;
00481 int block_max_depth;
00482 int last_block_max_depth;
00483 Plane plane[MAX_PLANES];
00484 BlockNode *block;
00485 #define ME_CACHE_SIZE 1024
00486 int me_cache[ME_CACHE_SIZE];
00487 int me_cache_generation;
00488 slice_buffer sb;
00489
00490 MpegEncContext m;
00491 }SnowContext;
00492
00493 typedef struct {
00494 IDWTELEM *b0;
00495 IDWTELEM *b1;
00496 IDWTELEM *b2;
00497 IDWTELEM *b3;
00498 int y;
00499 } dwt_compose_t;
00500
00501 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
00502
00503
00504 static void iterative_me(SnowContext *s);
00505
00506 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
00507 {
00508 int i;
00509
00510 buf->base_buffer = base_buffer;
00511 buf->line_count = line_count;
00512 buf->line_width = line_width;
00513 buf->data_count = max_allocated_lines;
00514 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
00515 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
00516
00517 for (i = 0; i < max_allocated_lines; i++)
00518 {
00519 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
00520 }
00521
00522 buf->data_stack_top = max_allocated_lines - 1;
00523 }
00524
00525 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
00526 {
00527 int offset;
00528 IDWTELEM * buffer;
00529
00530
00531
00532 assert(buf->data_stack_top >= 0);
00533
00534 if (buf->line[line])
00535 return buf->line[line];
00536
00537 offset = buf->line_width * line;
00538 buffer = buf->data_stack[buf->data_stack_top];
00539 buf->data_stack_top--;
00540 buf->line[line] = buffer;
00541
00542
00543
00544 return buffer;
00545 }
00546
00547 static void slice_buffer_release(slice_buffer * buf, int line)
00548 {
00549 int offset;
00550 IDWTELEM * buffer;
00551
00552 assert(line >= 0 && line < buf->line_count);
00553 assert(buf->line[line]);
00554
00555 offset = buf->line_width * line;
00556 buffer = buf->line[line];
00557 buf->data_stack_top++;
00558 buf->data_stack[buf->data_stack_top] = buffer;
00559 buf->line[line] = NULL;
00560
00561
00562 }
00563
00564 static void slice_buffer_flush(slice_buffer * buf)
00565 {
00566 int i;
00567 for (i = 0; i < buf->line_count; i++)
00568 {
00569 if (buf->line[i])
00570 {
00571
00572 slice_buffer_release(buf, i);
00573 }
00574 }
00575 }
00576
00577 static void slice_buffer_destroy(slice_buffer * buf)
00578 {
00579 int i;
00580 slice_buffer_flush(buf);
00581
00582 for (i = buf->data_count - 1; i >= 0; i--)
00583 {
00584 av_freep(&buf->data_stack[i]);
00585 }
00586 av_freep(&buf->data_stack);
00587 av_freep(&buf->line);
00588 }
00589
00590 #ifdef __sgi
00591
00592 #undef qexp
00593 #endif
00594 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
00595 static uint8_t qexp[QROOT];
00596
00597 static inline int mirror(int v, int m){
00598 while((unsigned)v > (unsigned)m){
00599 v=-v;
00600 if(v<0) v+= 2*m;
00601 }
00602 return v;
00603 }
00604
00605 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
00606 int i;
00607
00608 if(v){
00609 const int a= FFABS(v);
00610 const int e= av_log2(a);
00611 #if 1
00612 const int el= FFMIN(e, 10);
00613 put_rac(c, state+0, 0);
00614
00615 for(i=0; i<el; i++){
00616 put_rac(c, state+1+i, 1);
00617 }
00618 for(; i<e; i++){
00619 put_rac(c, state+1+9, 1);
00620 }
00621 put_rac(c, state+1+FFMIN(i,9), 0);
00622
00623 for(i=e-1; i>=el; i--){
00624 put_rac(c, state+22+9, (a>>i)&1);
00625 }
00626 for(; i>=0; i--){
00627 put_rac(c, state+22+i, (a>>i)&1);
00628 }
00629
00630 if(is_signed)
00631 put_rac(c, state+11 + el, v < 0);
00632 #else
00633
00634 put_rac(c, state+0, 0);
00635 if(e<=9){
00636 for(i=0; i<e; i++){
00637 put_rac(c, state+1+i, 1);
00638 }
00639 put_rac(c, state+1+i, 0);
00640
00641 for(i=e-1; i>=0; i--){
00642 put_rac(c, state+22+i, (a>>i)&1);
00643 }
00644
00645 if(is_signed)
00646 put_rac(c, state+11 + e, v < 0);
00647 }else{
00648 for(i=0; i<e; i++){
00649 put_rac(c, state+1+FFMIN(i,9), 1);
00650 }
00651 put_rac(c, state+1+FFMIN(i,9), 0);
00652
00653 for(i=e-1; i>=0; i--){
00654 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1);
00655 }
00656
00657 if(is_signed)
00658 put_rac(c, state+11 + FFMIN(e,10), v < 0);
00659 }
00660 #endif
00661 }else{
00662 put_rac(c, state+0, 1);
00663 }
00664 }
00665
00666 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
00667 if(get_rac(c, state+0))
00668 return 0;
00669 else{
00670 int i, e, a;
00671 e= 0;
00672 while(get_rac(c, state+1 + FFMIN(e,9))){
00673 e++;
00674 }
00675
00676 a= 1;
00677 for(i=e-1; i>=0; i--){
00678 a += a + get_rac(c, state+22 + FFMIN(i,9));
00679 }
00680
00681 if(is_signed && get_rac(c, state+11 + FFMIN(e,10)))
00682 return -a;
00683 else
00684 return a;
00685 }
00686 }
00687
00688 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
00689 int i;
00690 int r= log2>=0 ? 1<<log2 : 1;
00691
00692 assert(v>=0);
00693 assert(log2>=-4);
00694
00695 while(v >= r){
00696 put_rac(c, state+4+log2, 1);
00697 v -= r;
00698 log2++;
00699 if(log2>0) r+=r;
00700 }
00701 put_rac(c, state+4+log2, 0);
00702
00703 for(i=log2-1; i>=0; i--){
00704 put_rac(c, state+31-i, (v>>i)&1);
00705 }
00706 }
00707
00708 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
00709 int i;
00710 int r= log2>=0 ? 1<<log2 : 1;
00711 int v=0;
00712
00713 assert(log2>=-4);
00714
00715 while(get_rac(c, state+4+log2)){
00716 v+= r;
00717 log2++;
00718 if(log2>0) r+=r;
00719 }
00720
00721 for(i=log2-1; i>=0; i--){
00722 v+= get_rac(c, state+31-i)<<i;
00723 }
00724
00725 return v;
00726 }
00727
00728 static av_always_inline void
00729 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
00730 int dst_step, int src_step, int ref_step,
00731 int width, int mul, int add, int shift,
00732 int highpass, int inverse){
00733 const int mirror_left= !highpass;
00734 const int mirror_right= (width&1) ^ highpass;
00735 const int w= (width>>1) - 1 + (highpass & width);
00736 int i;
00737
00738 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
00739 if(mirror_left){
00740 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
00741 dst += dst_step;
00742 src += src_step;
00743 }
00744
00745 for(i=0; i<w; i++){
00746 dst[i*dst_step] =
00747 LIFT(src[i*src_step],
00748 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
00749 inverse);
00750 }
00751
00752 if(mirror_right){
00753 dst[w*dst_step] =
00754 LIFT(src[w*src_step],
00755 ((mul*2*ref[w*ref_step]+add)>>shift),
00756 inverse);
00757 }
00758 }
00759
00760 static av_always_inline void
00761 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
00762 int dst_step, int src_step, int ref_step,
00763 int width, int mul, int add, int shift,
00764 int highpass, int inverse){
00765 const int mirror_left= !highpass;
00766 const int mirror_right= (width&1) ^ highpass;
00767 const int w= (width>>1) - 1 + (highpass & width);
00768 int i;
00769
00770 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
00771 if(mirror_left){
00772 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
00773 dst += dst_step;
00774 src += src_step;
00775 }
00776
00777 for(i=0; i<w; i++){
00778 dst[i*dst_step] =
00779 LIFT(src[i*src_step],
00780 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
00781 inverse);
00782 }
00783
00784 if(mirror_right){
00785 dst[w*dst_step] =
00786 LIFT(src[w*src_step],
00787 ((mul*2*ref[w*ref_step]+add)>>shift),
00788 inverse);
00789 }
00790 }
00791
00792 #ifndef liftS
00793 static av_always_inline void
00794 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
00795 int dst_step, int src_step, int ref_step,
00796 int width, int mul, int add, int shift,
00797 int highpass, int inverse){
00798 const int mirror_left= !highpass;
00799 const int mirror_right= (width&1) ^ highpass;
00800 const int w= (width>>1) - 1 + (highpass & width);
00801 int i;
00802
00803 assert(shift == 4);
00804 #define LIFTS(src, ref, inv) \
00805 ((inv) ? \
00806 (src) + (((ref) + 4*(src))>>shift): \
00807 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
00808 if(mirror_left){
00809 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
00810 dst += dst_step;
00811 src += src_step;
00812 }
00813
00814 for(i=0; i<w; i++){
00815 dst[i*dst_step] =
00816 LIFTS(src[i*src_step],
00817 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
00818 inverse);
00819 }
00820
00821 if(mirror_right){
00822 dst[w*dst_step] =
00823 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
00824 }
00825 }
00826 static av_always_inline void
00827 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
00828 int dst_step, int src_step, int ref_step,
00829 int width, int mul, int add, int shift,
00830 int highpass, int inverse){
00831 const int mirror_left= !highpass;
00832 const int mirror_right= (width&1) ^ highpass;
00833 const int w= (width>>1) - 1 + (highpass & width);
00834 int i;
00835
00836 assert(shift == 4);
00837 #define LIFTS(src, ref, inv) \
00838 ((inv) ? \
00839 (src) + (((ref) + 4*(src))>>shift): \
00840 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
00841 if(mirror_left){
00842 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
00843 dst += dst_step;
00844 src += src_step;
00845 }
00846
00847 for(i=0; i<w; i++){
00848 dst[i*dst_step] =
00849 LIFTS(src[i*src_step],
00850 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
00851 inverse);
00852 }
00853
00854 if(mirror_right){
00855 dst[w*dst_step] =
00856 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
00857 }
00858 }
00859 #endif
00860
00861 static void horizontal_decompose53i(DWTELEM *b, int width){
00862 DWTELEM temp[width];
00863 const int width2= width>>1;
00864 int x;
00865 const int w2= (width+1)>>1;
00866
00867 for(x=0; x<width2; x++){
00868 temp[x ]= b[2*x ];
00869 temp[x+w2]= b[2*x + 1];
00870 }
00871 if(width&1)
00872 temp[x ]= b[2*x ];
00873 #if 0
00874 {
00875 int A1,A2,A3,A4;
00876 A2= temp[1 ];
00877 A4= temp[0 ];
00878 A1= temp[0+width2];
00879 A1 -= (A2 + A4)>>1;
00880 A4 += (A1 + 1)>>1;
00881 b[0+width2] = A1;
00882 b[0 ] = A4;
00883 for(x=1; x+1<width2; x+=2){
00884 A3= temp[x+width2];
00885 A4= temp[x+1 ];
00886 A3 -= (A2 + A4)>>1;
00887 A2 += (A1 + A3 + 2)>>2;
00888 b[x+width2] = A3;
00889 b[x ] = A2;
00890
00891 A1= temp[x+1+width2];
00892 A2= temp[x+2 ];
00893 A1 -= (A2 + A4)>>1;
00894 A4 += (A1 + A3 + 2)>>2;
00895 b[x+1+width2] = A1;
00896 b[x+1 ] = A4;
00897 }
00898 A3= temp[width-1];
00899 A3 -= A2;
00900 A2 += (A1 + A3 + 2)>>2;
00901 b[width -1] = A3;
00902 b[width2-1] = A2;
00903 }
00904 #else
00905 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
00906 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
00907 #endif
00908 }
00909
00910 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00911 int i;
00912
00913 for(i=0; i<width; i++){
00914 b1[i] -= (b0[i] + b2[i])>>1;
00915 }
00916 }
00917
00918 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00919 int i;
00920
00921 for(i=0; i<width; i++){
00922 b1[i] += (b0[i] + b2[i] + 2)>>2;
00923 }
00924 }
00925
00926 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
00927 int y;
00928 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
00929 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
00930
00931 for(y=-2; y<height; y+=2){
00932 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
00933 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
00934
00935 {START_TIMER
00936 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
00937 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
00938 STOP_TIMER("horizontal_decompose53i")}
00939
00940 {START_TIMER
00941 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
00942 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
00943 STOP_TIMER("vertical_decompose53i*")}
00944
00945 b0=b2;
00946 b1=b3;
00947 }
00948 }
00949
00950 static void horizontal_decompose97i(DWTELEM *b, int width){
00951 DWTELEM temp[width];
00952 const int w2= (width+1)>>1;
00953
00954 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
00955 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
00956 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
00957 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
00958 }
00959
00960
00961 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00962 int i;
00963
00964 for(i=0; i<width; i++){
00965 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
00966 }
00967 }
00968
00969 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00970 int i;
00971
00972 for(i=0; i<width; i++){
00973 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
00974 }
00975 }
00976
00977 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00978 int i;
00979
00980 for(i=0; i<width; i++){
00981 #ifdef liftS
00982 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
00983 #else
00984 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
00985 #endif
00986 }
00987 }
00988
00989 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00990 int i;
00991
00992 for(i=0; i<width; i++){
00993 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
00994 }
00995 }
00996
00997 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
00998 int y;
00999 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
01000 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
01001 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
01002 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
01003
01004 for(y=-4; y<height; y+=2){
01005 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
01006 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
01007
01008 {START_TIMER
01009 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
01010 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
01011 if(width>400){
01012 STOP_TIMER("horizontal_decompose97i")
01013 }}
01014
01015 {START_TIMER
01016 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
01017 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
01018 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
01019 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
01020
01021 if(width>400){
01022 STOP_TIMER("vertical_decompose97i")
01023 }}
01024
01025 b0=b2;
01026 b1=b3;
01027 b2=b4;
01028 b3=b5;
01029 }
01030 }
01031
01032 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
01033 int level;
01034
01035 for(level=0; level<decomposition_count; level++){
01036 switch(type){
01037 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
01038 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
01039 }
01040 }
01041 }
01042
01043 static void horizontal_compose53i(IDWTELEM *b, int width){
01044 IDWTELEM temp[width];
01045 const int width2= width>>1;
01046 const int w2= (width+1)>>1;
01047 int x;
01048
01049 #if 0
01050 int A1,A2,A3,A4;
01051 A2= temp[1 ];
01052 A4= temp[0 ];
01053 A1= temp[0+width2];
01054 A1 -= (A2 + A4)>>1;
01055 A4 += (A1 + 1)>>1;
01056 b[0+width2] = A1;
01057 b[0 ] = A4;
01058 for(x=1; x+1<width2; x+=2){
01059 A3= temp[x+width2];
01060 A4= temp[x+1 ];
01061 A3 -= (A2 + A4)>>1;
01062 A2 += (A1 + A3 + 2)>>2;
01063 b[x+width2] = A3;
01064 b[x ] = A2;
01065
01066 A1= temp[x+1+width2];
01067 A2= temp[x+2 ];
01068 A1 -= (A2 + A4)>>1;
01069 A4 += (A1 + A3 + 2)>>2;
01070 b[x+1+width2] = A1;
01071 b[x+1 ] = A4;
01072 }
01073 A3= temp[width-1];
01074 A3 -= A2;
01075 A2 += (A1 + A3 + 2)>>2;
01076 b[width -1] = A3;
01077 b[width2-1] = A2;
01078 #else
01079 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
01080 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
01081 #endif
01082 for(x=0; x<width2; x++){
01083 b[2*x ]= temp[x ];
01084 b[2*x + 1]= temp[x+w2];
01085 }
01086 if(width&1)
01087 b[2*x ]= temp[x ];
01088 }
01089
01090 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01091 int i;
01092
01093 for(i=0; i<width; i++){
01094 b1[i] += (b0[i] + b2[i])>>1;
01095 }
01096 }
01097
01098 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01099 int i;
01100
01101 for(i=0; i<width; i++){
01102 b1[i] -= (b0[i] + b2[i] + 2)>>2;
01103 }
01104 }
01105
01106 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
01107 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
01108 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
01109 cs->y = -1;
01110 }
01111
01112 static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
01113 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
01114 cs->b1 = buffer + mirror(-1 , height-1)*stride;
01115 cs->y = -1;
01116 }
01117
01118 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
01119 int y= cs->y;
01120
01121 IDWTELEM *b0= cs->b0;
01122 IDWTELEM *b1= cs->b1;
01123 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
01124 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
01125
01126 {START_TIMER
01127 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
01128 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
01129 STOP_TIMER("vertical_compose53i*")}
01130
01131 {START_TIMER
01132 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
01133 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
01134 STOP_TIMER("horizontal_compose53i")}
01135
01136 cs->b0 = b2;
01137 cs->b1 = b3;
01138 cs->y += 2;
01139 }
01140
01141 static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
01142 int y= cs->y;
01143 IDWTELEM *b0= cs->b0;
01144 IDWTELEM *b1= cs->b1;
01145 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
01146 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
01147
01148 {START_TIMER
01149 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
01150 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
01151 STOP_TIMER("vertical_compose53i*")}
01152
01153 {START_TIMER
01154 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
01155 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
01156 STOP_TIMER("horizontal_compose53i")}
01157
01158 cs->b0 = b2;
01159 cs->b1 = b3;
01160 cs->y += 2;
01161 }
01162
01163
01164 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
01165 IDWTELEM temp[width];
01166 const int w2= (width+1)>>1;
01167
01168 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
01169 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
01170 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
01171 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
01172 }
01173
01174 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01175 int i;
01176
01177 for(i=0; i<width; i++){
01178 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
01179 }
01180 }
01181
01182 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01183 int i;
01184
01185 for(i=0; i<width; i++){
01186 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
01187 }
01188 }
01189
01190 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01191 int i;
01192
01193 for(i=0; i<width; i++){
01194 #ifdef liftS
01195 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
01196 #else
01197 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
01198 #endif
01199 }
01200 }
01201
01202 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01203 int i;
01204
01205 for(i=0; i<width; i++){
01206 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
01207 }
01208 }
01209
01210 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
01211 int i;
01212
01213 for(i=0; i<width; i++){
01214 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
01215 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
01216 #ifdef liftS
01217 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
01218 #else
01219 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
01220 #endif
01221 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
01222 }
01223 }
01224
01225 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
01226 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
01227 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
01228 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
01229 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
01230 cs->y = -3;
01231 }
01232
01233 static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
01234 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
01235 cs->b1 = buffer + mirror(-3 , height-1)*stride;
01236 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
01237 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
01238 cs->y = -3;
01239 }
01240
01241 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
01242 int y = cs->y;
01243
01244 IDWTELEM *b0= cs->b0;
01245 IDWTELEM *b1= cs->b1;
01246 IDWTELEM *b2= cs->b2;
01247 IDWTELEM *b3= cs->b3;
01248 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
01249 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
01250
01251 {START_TIMER
01252 if(y>0 && y+4<height){
01253 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
01254 }else{
01255 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
01256 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
01257 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
01258 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
01259 }
01260 if(width>400){
01261 STOP_TIMER("vertical_compose97i")}}
01262
01263 {START_TIMER
01264 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
01265 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
01266 if(width>400 && y+0<(unsigned)height){
01267 STOP_TIMER("horizontal_compose97i")}}
01268
01269 cs->b0=b2;
01270 cs->b1=b3;
01271 cs->b2=b4;
01272 cs->b3=b5;
01273 cs->y += 2;
01274 }
01275
01276 static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
01277 int y = cs->y;
01278 IDWTELEM *b0= cs->b0;
01279 IDWTELEM *b1= cs->b1;
01280 IDWTELEM *b2= cs->b2;
01281 IDWTELEM *b3= cs->b3;
01282 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
01283 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
01284
01285 {START_TIMER
01286 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
01287 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
01288 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
01289 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
01290 if(width>400){
01291 STOP_TIMER("vertical_compose97i")}}
01292
01293 {START_TIMER
01294 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
01295 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
01296 if(width>400 && b0 <= b2){
01297 STOP_TIMER("horizontal_compose97i")}}
01298
01299 cs->b0=b2;
01300 cs->b1=b3;
01301 cs->b2=b4;
01302 cs->b3=b5;
01303 cs->y += 2;
01304 }
01305
01306 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
01307 int level;
01308 for(level=decomposition_count-1; level>=0; level--){
01309 switch(type){
01310 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
01311 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
01312 }
01313 }
01314 }
01315
01316 static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
01317 int level;
01318 for(level=decomposition_count-1; level>=0; level--){
01319 switch(type){
01320 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
01321 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
01322 }
01323 }
01324 }
01325
01326 static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
01327 const int support = type==1 ? 3 : 5;
01328 int level;
01329 if(type==2) return;
01330
01331 for(level=decomposition_count-1; level>=0; level--){
01332 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
01333 switch(type){
01334 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
01335 break;
01336 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
01337 break;
01338 }
01339 }
01340 }
01341 }
01342
01343 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
01344 const int support = type==1 ? 3 : 5;
01345 int level;
01346 if(type==2) return;
01347
01348 for(level=decomposition_count-1; level>=0; level--){
01349 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
01350 switch(type){
01351 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
01352 break;
01353 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
01354 break;
01355 }
01356 }
01357 }
01358 }
01359
01360 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
01361 dwt_compose_t cs[MAX_DECOMPOSITIONS];
01362 int y;
01363 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
01364 for(y=0; y<height; y+=4)
01365 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
01366 }
01367
01368 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
01369 const int w= b->width;
01370 const int h= b->height;
01371 int x, y;
01372
01373 if(1){
01374 int run=0;
01375 int runs[w*h];
01376 int run_index=0;
01377 int max_index;
01378
01379 for(y=0; y<h; y++){
01380 for(x=0; x<w; x++){
01381 int v, p=0;
01382 int l=0, lt=0, t=0, rt=0;
01383 v= src[x + y*stride];
01384
01385 if(y){
01386 t= src[x + (y-1)*stride];
01387 if(x){
01388 lt= src[x - 1 + (y-1)*stride];
01389 }
01390 if(x + 1 < w){
01391 rt= src[x + 1 + (y-1)*stride];
01392 }
01393 }
01394 if(x){
01395 l= src[x - 1 + y*stride];
01396
01397
01398
01399
01400 }
01401 if(parent){
01402 int px= x>>1;
01403 int py= y>>1;
01404 if(px<b->parent->width && py<b->parent->height)
01405 p= parent[px + py*2*stride];
01406 }
01407 if(!(l|lt|t|rt|p)){
01408 if(v){
01409 runs[run_index++]= run;
01410 run=0;
01411 }else{
01412 run++;
01413 }
01414 }
01415 }
01416 }
01417 max_index= run_index;
01418 runs[run_index++]= run;
01419 run_index=0;
01420 run= runs[run_index++];
01421
01422 put_symbol2(&s->c, b->state[30], max_index, 0);
01423 if(run_index <= max_index)
01424 put_symbol2(&s->c, b->state[1], run, 3);
01425
01426 for(y=0; y<h; y++){
01427 if(s->c.bytestream_end - s->c.bytestream < w*40){
01428 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
01429 return -1;
01430 }
01431 for(x=0; x<w; x++){
01432 int v, p=0;
01433 int l=0, lt=0, t=0, rt=0;
01434 v= src[x + y*stride];
01435
01436 if(y){
01437 t= src[x + (y-1)*stride];
01438 if(x){
01439 lt= src[x - 1 + (y-1)*stride];
01440 }
01441 if(x + 1 < w){
01442 rt= src[x + 1 + (y-1)*stride];
01443 }
01444 }
01445 if(x){
01446 l= src[x - 1 + y*stride];
01447
01448
01449
01450
01451 }
01452 if(parent){
01453 int px= x>>1;
01454 int py= y>>1;
01455 if(px<b->parent->width && py<b->parent->height)
01456 p= parent[px + py*2*stride];
01457 }
01458 if(l|lt|t|rt|p){
01459 int context= av_log2(3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
01460
01461 put_rac(&s->c, &b->state[0][context], !!v);
01462 }else{
01463 if(!run){
01464 run= runs[run_index++];
01465
01466 if(run_index <= max_index)
01467 put_symbol2(&s->c, b->state[1], run, 3);
01468 assert(v);
01469 }else{
01470 run--;
01471 assert(!v);
01472 }
01473 }
01474 if(v){
01475 int context= av_log2(3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
01476 int l2= 2*FFABS(l) + (l<0);
01477 int t2= 2*FFABS(t) + (t<0);
01478
01479 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
01480 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
01481 }
01482 }
01483 }
01484 }
01485 return 0;
01486 }
01487
01488 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
01489
01490
01491 return encode_subband_c0run(s, b, src, parent, stride, orientation);
01492
01493 }
01494
01495 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
01496 const int w= b->width;
01497 const int h= b->height;
01498 int x,y;
01499
01500 if(1){
01501 int run, runs;
01502 x_and_coeff *xc= b->x_coeff;
01503 x_and_coeff *prev_xc= NULL;
01504 x_and_coeff *prev2_xc= xc;
01505 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
01506 x_and_coeff *prev_parent_xc= parent_xc;
01507
01508 runs= get_symbol2(&s->c, b->state[30], 0);
01509 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
01510 else run= INT_MAX;
01511
01512 for(y=0; y<h; y++){
01513 int v=0;
01514 int lt=0, t=0, rt=0;
01515
01516 if(y && prev_xc->x == 0){
01517 rt= prev_xc->coeff;
01518 }
01519 for(x=0; x<w; x++){
01520 int p=0;
01521 const int l= v;
01522
01523 lt= t; t= rt;
01524
01525 if(y){
01526 if(prev_xc->x <= x)
01527 prev_xc++;
01528 if(prev_xc->x == x + 1)
01529 rt= prev_xc->coeff;
01530 else
01531 rt=0;
01532 }
01533 if(parent_xc){
01534 if(x>>1 > parent_xc->x){
01535 parent_xc++;
01536 }
01537 if(x>>1 == parent_xc->x){
01538 p= parent_xc->coeff;
01539 }
01540 }
01541 if(l|lt|t|rt|p){
01542 int context= av_log2(3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
01543
01544 v=get_rac(&s->c, &b->state[0][context]);
01545 if(v){
01546 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
01547 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
01548
01549 xc->x=x;
01550 (xc++)->coeff= v;
01551 }
01552 }else{
01553 if(!run){
01554 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
01555 else run= INT_MAX;
01556 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
01557 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
01558
01559 xc->x=x;
01560 (xc++)->coeff= v;
01561 }else{
01562 int max_run;
01563 run--;
01564 v=0;
01565
01566 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
01567 else max_run= FFMIN(run, w-x-1);
01568 if(parent_xc)
01569 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
01570 x+= max_run;
01571 run-= max_run;
01572 }
01573 }
01574 }
01575 (xc++)->x= w+1;
01576 prev_xc= prev2_xc;
01577 prev2_xc= xc;
01578
01579 if(parent_xc){
01580 if(y&1){
01581 while(parent_xc->x != parent->width+1)
01582 parent_xc++;
01583 parent_xc++;
01584 prev_parent_xc= parent_xc;
01585 }else{
01586 parent_xc= prev_parent_xc;
01587 }
01588 }
01589 }
01590
01591 (xc++)->x= w+1;
01592 }
01593 }
01594
01595 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
01596 const int w= b->width;
01597 int y;
01598 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
01599 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
01600 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
01601 int new_index = 0;
01602
01603 START_TIMER
01604
01605 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
01606 qadd= 0;
01607 qmul= 1<<QEXPSHIFT;
01608 }
01609
01610
01611 if (start_y != 0)
01612 new_index = save_state[0];
01613
01614
01615 for(y=start_y; y<h; y++){
01616 int x = 0;
01617 int v;
01618 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
01619 memset(line, 0, b->width*sizeof(IDWTELEM));
01620 v = b->x_coeff[new_index].coeff;
01621 x = b->x_coeff[new_index++].x;
01622 while(x < w)
01623 {
01624 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
01625 register int u= -(v&1);
01626 line[x] = (t^u) - u;
01627
01628 v = b->x_coeff[new_index].coeff;
01629 x = b->x_coeff[new_index++].x;
01630 }
01631 }
01632 if(w > 200 && start_y != 0){
01633 STOP_TIMER("decode_subband")
01634 }
01635
01636
01637 save_state[0] = new_index;
01638
01639 return;
01640 }
01641
01642 static void reset_contexts(SnowContext *s){
01643 int plane_index, level, orientation;
01644
01645 for(plane_index=0; plane_index<3; plane_index++){
01646 for(level=0; level<MAX_DECOMPOSITIONS; level++){
01647 for(orientation=level ? 1:0; orientation<4; orientation++){
01648 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
01649 }
01650 }
01651 }
01652 memset(s->header_state, MID_STATE, sizeof(s->header_state));
01653 memset(s->block_state, MID_STATE, sizeof(s->block_state));
01654 }
01655
01656 static int alloc_blocks(SnowContext *s){
01657 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
01658 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
01659
01660 s->b_width = w;
01661 s->b_height= h;
01662
01663 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
01664 return 0;
01665 }
01666
01667 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
01668 uint8_t *bytestream= d->bytestream;
01669 uint8_t *bytestream_start= d->bytestream_start;
01670 *d= *s;
01671 d->bytestream= bytestream;
01672 d->bytestream_start= bytestream_start;
01673 }
01674
01675
01676 static int pix_sum(uint8_t * pix, int line_size, int w)
01677 {
01678 int s, i, j;
01679
01680 s = 0;
01681 for (i = 0; i < w; i++) {
01682 for (j = 0; j < w; j++) {
01683 s += pix[0];
01684 pix ++;
01685 }
01686 pix += line_size - w;
01687 }
01688 return s;
01689 }
01690
01691
01692 static int pix_norm1(uint8_t * pix, int line_size, int w)
01693 {
01694 int s, i, j;
01695 uint32_t *sq = ff_squareTbl + 256;
01696
01697 s = 0;
01698 for (i = 0; i < w; i++) {
01699 for (j = 0; j < w; j ++) {
01700 s += sq[pix[0]];
01701 pix ++;
01702 }
01703 pix += line_size - w;
01704 }
01705 return s;
01706 }
01707
01708 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
01709 const int w= s->b_width << s->block_max_depth;
01710 const int rem_depth= s->block_max_depth - level;
01711 const int index= (x + y*w) << rem_depth;
01712 const int block_w= 1<<rem_depth;
01713 BlockNode block;
01714 int i,j;
01715
01716 block.color[0]= l;
01717 block.color[1]= cb;
01718 block.color[2]= cr;
01719 block.mx= mx;
01720 block.my= my;
01721 block.ref= ref;
01722 block.type= type;
01723 block.level= level;
01724
01725 for(j=0; j<block_w; j++){
01726 for(i=0; i<block_w; i++){
01727 s->block[index + i + j*w]= block;
01728 }
01729 }
01730 }
01731
01732 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
01733 const int offset[3]= {
01734 y*c-> stride + x,
01735 ((y*c->uvstride + x)>>1),
01736 ((y*c->uvstride + x)>>1),
01737 };
01738 int i;
01739 for(i=0; i<3; i++){
01740 c->src[0][i]= src [i];
01741 c->ref[0][i]= ref [i] + offset[i];
01742 }
01743 assert(!ref_index);
01744 }
01745
01746 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
01747 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
01748 if(s->ref_frames == 1){
01749 *mx = mid_pred(left->mx, top->mx, tr->mx);
01750 *my = mid_pred(left->my, top->my, tr->my);
01751 }else{
01752 const int *scale = scale_mv_ref[ref];
01753 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
01754 (top ->mx * scale[top ->ref] + 128) >>8,
01755 (tr ->mx * scale[tr ->ref] + 128) >>8);
01756 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
01757 (top ->my * scale[top ->ref] + 128) >>8,
01758 (tr ->my * scale[tr ->ref] + 128) >>8);
01759 }
01760 }
01761
01762
01763 #define P_LEFT P[1]
01764 #define P_TOP P[2]
01765 #define P_TOPRIGHT P[3]
01766 #define P_MEDIAN P[4]
01767 #define P_MV1 P[9]
01768 #define FLAG_QPEL 1 //must be 1
01769
01770 static int encode_q_branch(SnowContext *s, int level, int x, int y){
01771 uint8_t p_buffer[1024];
01772 uint8_t i_buffer[1024];
01773 uint8_t p_state[sizeof(s->block_state)];
01774 uint8_t i_state[sizeof(s->block_state)];
01775 RangeCoder pc, ic;
01776 uint8_t *pbbak= s->c.bytestream;
01777 uint8_t *pbbak_start= s->c.bytestream_start;
01778 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
01779 const int w= s->b_width << s->block_max_depth;
01780 const int h= s->b_height << s->block_max_depth;
01781 const int rem_depth= s->block_max_depth - level;
01782 const int index= (x + y*w) << rem_depth;
01783 const int block_w= 1<<(LOG2_MB_SIZE - level);
01784 int trx= (x+1)<<rem_depth;
01785 int try= (y+1)<<rem_depth;
01786 const BlockNode *left = x ? &s->block[index-1] : &null_block;
01787 const BlockNode *top = y ? &s->block[index-w] : &null_block;
01788 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
01789 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
01790 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
01791 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl;
01792 int pl = left->color[0];
01793 int pcb= left->color[1];
01794 int pcr= left->color[2];
01795 int pmx, pmy;
01796 int mx=0, my=0;
01797 int l,cr,cb;
01798 const int stride= s->current_picture.linesize[0];
01799 const int uvstride= s->current_picture.linesize[1];
01800 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
01801 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
01802 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
01803 int P[10][2];
01804 int16_t last_mv[3][2];
01805 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL);
01806 const int shift= 1+qpel;
01807 MotionEstContext *c= &s->m.me;
01808 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
01809 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
01810 int my_context= av_log2(2*FFABS(left->my - top->my));
01811 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
01812 int ref, best_ref, ref_score, ref_mx, ref_my;
01813
01814 assert(sizeof(s->block_state) >= 256);
01815 if(s->keyframe){
01816 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
01817 return 0;
01818 }
01819
01820
01821
01822 P_LEFT[0]= left->mx;
01823 P_LEFT[1]= left->my;
01824 P_TOP [0]= top->mx;
01825 P_TOP [1]= top->my;
01826 P_TOPRIGHT[0]= tr->mx;
01827 P_TOPRIGHT[1]= tr->my;
01828
01829 last_mv[0][0]= s->block[index].mx;
01830 last_mv[0][1]= s->block[index].my;
01831 last_mv[1][0]= right->mx;
01832 last_mv[1][1]= right->my;
01833 last_mv[2][0]= bottom->mx;
01834 last_mv[2][1]= bottom->my;
01835
01836 s->m.mb_stride=2;
01837 s->m.mb_x=
01838 s->m.mb_y= 0;
01839 c->skip= 0;
01840
01841 assert(c-> stride == stride);
01842 assert(c->uvstride == uvstride);
01843
01844 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
01845 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
01846 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
01847 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
01848
01849 c->xmin = - x*block_w - 16+2;
01850 c->ymin = - y*block_w - 16+2;
01851 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
01852 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
01853
01854 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
01855 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
01856 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
01857 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
01858 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
01859 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
01860 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
01861
01862 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
01863 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
01864
01865 if (!y) {
01866 c->pred_x= P_LEFT[0];
01867 c->pred_y= P_LEFT[1];
01868 } else {
01869 c->pred_x = P_MEDIAN[0];
01870 c->pred_y = P_MEDIAN[1];
01871 }
01872
01873 score= INT_MAX;
01874 best_ref= 0;
01875 for(ref=0; ref<s->ref_frames; ref++){
01876 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
01877
01878 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, 0, last_mv,
01879 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
01880
01881 assert(ref_mx >= c->xmin);
01882 assert(ref_mx <= c->xmax);
01883 assert(ref_my >= c->ymin);
01884 assert(ref_my <= c->ymax);
01885
01886 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
01887 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
01888 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
01889 if(s->ref_mvs[ref]){
01890 s->ref_mvs[ref][index][0]= ref_mx;
01891 s->ref_mvs[ref][index][1]= ref_my;
01892 s->ref_scores[ref][index]= ref_score;
01893 }
01894 if(score > ref_score){
01895 score= ref_score;
01896 best_ref= ref;
01897 mx= ref_mx;
01898 my= ref_my;
01899 }
01900 }
01901
01902
01903
01904 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
01905 pc= s->c;
01906 pc.bytestream_start=
01907 pc.bytestream= p_buffer;
01908 memcpy(p_state, s->block_state, sizeof(s->block_state));
01909
01910 if(level!=s->block_max_depth)
01911 put_rac(&pc, &p_state[4 + s_context], 1);
01912 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
01913 if(s->ref_frames > 1)
01914 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
01915 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
01916 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
01917 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
01918 p_len= pc.bytestream - pc.bytestream_start;
01919 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
01920
01921 block_s= block_w*block_w;
01922 sum = pix_sum(current_data[0], stride, block_w);
01923 l= (sum + block_s/2)/block_s;
01924 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
01925
01926 block_s= block_w*block_w>>2;
01927 sum = pix_sum(current_data[1], uvstride, block_w>>1);
01928 cb= (sum + block_s/2)/block_s;
01929
01930 sum = pix_sum(current_data[2], uvstride, block_w>>1);
01931 cr= (sum + block_s/2)/block_s;
01932
01933
01934 ic= s->c;
01935 ic.bytestream_start=
01936 ic.bytestream= i_buffer;
01937 memcpy(i_state, s->block_state, sizeof(s->block_state));
01938 if(level!=s->block_max_depth)
01939 put_rac(&ic, &i_state[4 + s_context], 1);
01940 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
01941 put_symbol(&ic, &i_state[32], l-pl , 1);
01942 put_symbol(&ic, &i_state[64], cb-pcb, 1);
01943 put_symbol(&ic, &i_state[96], cr-pcr, 1);
01944 i_len= ic.bytestream - ic.bytestream_start;
01945 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
01946
01947
01948 assert(iscore < 255*255*256 + s->lambda2*10);
01949 assert(iscore >= 0);
01950 assert(l>=0 && l<=255);
01951 assert(pl>=0 && pl<=255);
01952
01953 if(level==0){
01954 int varc= iscore >> 8;
01955 int vard= score >> 8;
01956 if (vard <= 64 || vard < varc)
01957 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
01958 else
01959 c->scene_change_score+= s->m.qscale;
01960 }
01961
01962 if(level!=s->block_max_depth){
01963 put_rac(&s->c, &s->block_state[4 + s_context], 0);
01964 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
01965 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
01966 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
01967 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
01968 score2+= s->lambda2>>FF_LAMBDA_SHIFT;
01969
01970 if(score2 < score && score2 < iscore)
01971 return score2;
01972 }
01973
01974 if(iscore < score){
01975 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
01976 memcpy(pbbak, i_buffer, i_len);
01977 s->c= ic;
01978 s->c.bytestream_start= pbbak_start;
01979 s->c.bytestream= pbbak + i_len;
01980 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
01981 memcpy(s->block_state, i_state, sizeof(s->block_state));
01982 return iscore;
01983 }else{
01984 memcpy(pbbak, p_buffer, p_len);
01985 s->c= pc;
01986 s->c.bytestream_start= pbbak_start;
01987 s->c.bytestream= pbbak + p_len;
01988 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
01989 memcpy(s->block_state, p_state, sizeof(s->block_state));
01990 return score;
01991 }
01992 }
01993
01994 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
01995 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
01996 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
01997 }else{
01998 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
01999 }
02000 }
02001
02002 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
02003 const int w= s->b_width << s->block_max_depth;
02004 const int rem_depth= s->block_max_depth - level;
02005 const int index= (x + y*w) << rem_depth;
02006 int trx= (x+1)<<rem_depth;
02007 BlockNode *b= &s->block[index];
02008 const BlockNode *left = x ? &s->block[index-1] : &null_block;
02009 const BlockNode *top = y ? &s->block[index-w] : &null_block;
02010 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
02011 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl;
02012 int pl = left->color[0];
02013 int pcb= left->color[1];
02014 int pcr= left->color[2];
02015 int pmx, pmy;
02016 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
02017 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
02018 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
02019 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
02020
02021 if(s->keyframe){
02022 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
02023 return;
02024 }
02025
02026 if(level!=s->block_max_depth){
02027 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
02028 put_rac(&s->c, &s->block_state[4 + s_context], 1);
02029 }else{
02030 put_rac(&s->c, &s->block_state[4 + s_context], 0);
02031 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
02032 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
02033 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
02034 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
02035 return;
02036 }
02037 }
02038 if(b->type & BLOCK_INTRA){
02039 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
02040 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
02041 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
02042 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
02043 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
02044 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
02045 }else{
02046 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
02047 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
02048 if(s->ref_frames > 1)
02049 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
02050 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
02051 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
02052 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
02053 }
02054 }
02055
02056 static void decode_q_branch(SnowContext *s, int level, int x, int y){
02057 const int w= s->b_width << s->block_max_depth;
02058 const int rem_depth= s->block_max_depth - level;
02059 const int index= (x + y*w) << rem_depth;
02060 int trx= (x+1)<<rem_depth;
02061 const BlockNode *left = x ? &s->block[index-1] : &null_block;
02062 const BlockNode *top = y ? &s->block[index-w] : &null_block;
02063 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
02064 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl;
02065 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
02066
02067 if(s->keyframe){
02068 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
02069 return;
02070 }
02071
02072 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
02073 int type, mx, my;
02074 int l = left->color[0];
02075 int cb= left->color[1];
02076 int cr= left->color[2];
02077 int ref = 0;
02078 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
02079 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
02080 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
02081
02082 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
02083
02084 if(type){
02085 pred_mv(s, &mx, &my, 0, left, top, tr);
02086 l += get_symbol(&s->c, &s->block_state[32], 1);
02087 cb+= get_symbol(&s->c, &s->block_state[64], 1);
02088 cr+= get_symbol(&s->c, &s->block_state[96], 1);
02089 }else{
02090 if(s->ref_frames > 1)
02091 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
02092 pred_mv(s, &mx, &my, ref, left, top, tr);
02093 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
02094 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
02095 }
02096 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
02097 }else{
02098 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
02099 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
02100 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
02101 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
02102 }
02103 }
02104
02105 static void encode_blocks(SnowContext *s, int search){
02106 int x, y;
02107 int w= s->b_width;
02108 int h= s->b_height;
02109
02110 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
02111 iterative_me(s);
02112
02113 for(y=0; y<h; y++){
02114 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){
02115 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
02116 return;
02117 }
02118 for(x=0; x<w; x++){
02119 if(s->avctx->me_method == ME_ITER || !search)
02120 encode_q_branch2(s, 0, x, y);
02121 else
02122 encode_q_branch (s, 0, x, y);
02123 }
02124 }
02125 }
02126
02127 static void decode_blocks(SnowContext *s){
02128 int x, y;
02129 int w= s->b_width;
02130 int h= s->b_height;
02131
02132 for(y=0; y<h; y++){
02133 for(x=0; x<w; x++){
02134 decode_q_branch(s, 0, x, y);
02135 }
02136 }
02137 }
02138
02139 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
02140 const static uint8_t weight[64]={
02141 8,7,6,5,4,3,2,1,
02142 7,7,0,0,0,0,0,1,
02143 6,0,6,0,0,0,2,0,
02144 5,0,0,5,0,3,0,0,
02145 4,0,0,0,4,0,0,0,
02146 3,0,0,5,0,3,0,0,
02147 2,0,6,0,0,0,2,0,
02148 1,7,0,0,0,0,0,1,
02149 };
02150
02151 const static uint8_t brane[256]={
02152 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
02153 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
02154 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
02155 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
02156 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
02157 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
02158 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
02159 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
02160 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
02161 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
02162 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
02163 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
02164 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
02165 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
02166 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
02167 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
02168 };
02169
02170 const static uint8_t needs[16]={
02171 0,1,0,0,
02172 2,4,2,0,
02173 0,1,0,0,
02174 15
02175 };
02176
02177 int x, y, b, r, l;
02178 int16_t tmpIt [64*(32+HTAPS_MAX)];
02179 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
02180 int16_t *tmpI= tmpIt;
02181 uint8_t *tmp2= tmp2t[0];
02182 uint8_t *hpel[11];
02183 START_TIMER
02184 assert(dx<16 && dy<16);
02185 r= brane[dx + 16*dy]&15;
02186 l= brane[dx + 16*dy]>>4;
02187
02188 b= needs[l] | needs[r];
02189 if(p && !p->diag_mc)
02190 b= 15;
02191
02192 if(b&5){
02193 for(y=0; y < b_h+HTAPS_MAX-1; y++){
02194 for(x=0; x < b_w; x++){
02195 int a_1=src[x + HTAPS_MAX/2-4];
02196 int a0= src[x + HTAPS_MAX/2-3];
02197 int a1= src[x + HTAPS_MAX/2-2];
02198 int a2= src[x + HTAPS_MAX/2-1];
02199 int a3= src[x + HTAPS_MAX/2+0];
02200 int a4= src[x + HTAPS_MAX/2+1];
02201 int a5= src[x + HTAPS_MAX/2+2];
02202 int a6= src[x + HTAPS_MAX/2+3];
02203 int am=0;
02204 if(!p || p->fast_mc){
02205 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
02206 tmpI[x]= am;
02207 am= (am+16)>>5;
02208 }else{
02209 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
02210 tmpI[x]= am;
02211 am= (am+32)>>6;
02212 }
02213
02214 if(am&(~255)) am= ~(am>>31);
02215 tmp2[x]= am;
02216 }
02217 tmpI+= 64;
02218 tmp2+= stride;
02219 src += stride;
02220 }
02221 src -= stride*y;
02222 }
02223 src += HTAPS_MAX/2 - 1;
02224 tmp2= tmp2t[1];
02225
02226 if(b&2){
02227 for(y=0; y < b_h; y++){
02228 for(x=0; x < b_w+1; x++){
02229 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
02230 int a0= src[x + (HTAPS_MAX/2-3)*stride];
02231 int a1= src[x + (HTAPS_MAX/2-2)*stride];
02232 int a2= src[x + (HTAPS_MAX/2-1)*stride];
02233 int a3= src[x + (HTAPS_MAX/2+0)*stride];
02234 int a4= src[x + (HTAPS_MAX/2+1)*stride];
02235 int a5= src[x + (HTAPS_MAX/2+2)*stride];
02236 int a6= src[x + (HTAPS_MAX/2+3)*stride];
02237 int am=0;
02238 if(!p || p->fast_mc)
02239 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
02240 else
02241 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
02242
02243 if(am&(~255)) am= ~(am>>31);
02244 tmp2[x]= am;
02245 }
02246 src += stride;
02247 tmp2+= stride;
02248 }
02249 src -= stride*y;
02250 }
02251 src += stride*(HTAPS_MAX/2 - 1);
02252 tmp2= tmp2t[2];
02253 tmpI= tmpIt;
02254 if(b&4){
02255 for(y=0; y < b_h; y++){
02256 for(x=0; x < b_w; x++){
02257 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
02258 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
02259 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
02260 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
02261 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
02262 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
02263 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
02264 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
02265 int am=0;
02266 if(!p || p->fast_mc)
02267 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
02268 else
02269 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
02270 if(am&(~255)) am= ~(am>>31);
02271 tmp2[x]= am;
02272 }
02273 tmpI+= 64;
02274 tmp2+= stride;
02275 }
02276 }
02277
02278 hpel[ 0]= src;
02279 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
02280 hpel[ 2]= src + 1;
02281
02282 hpel[ 4]= tmp2t[1];
02283 hpel[ 5]= tmp2t[2];
02284 hpel[ 6]= tmp2t[1] + 1;
02285
02286 hpel[ 8]= src + stride;
02287 hpel[ 9]= hpel[1] + stride;
02288 hpel[10]= hpel[8] + 1;
02289
02290 if(b==15){
02291 uint8_t *src1= hpel[dx/8 + dy/8*4 ];
02292 uint8_t *src2= hpel[dx/8 + dy/8*4+1];
02293 uint8_t *src3= hpel[dx/8 + dy/8*4+4];
02294 uint8_t *src4= hpel[dx/8 + dy/8*4+5];
02295 dx&=7;
02296 dy&=7;
02297 for(y=0; y < b_h; y++){
02298 for(x=0; x < b_w; x++){
02299 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
02300 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
02301 }
02302 src1+=stride;
02303 src2+=stride;
02304 src3+=stride;
02305 src4+=stride;
02306 dst +=stride;
02307 }
02308 }else{
02309 uint8_t *src1= hpel[l];
02310 uint8_t *src2= hpel[r];
02311 int a= weight[((dx&7) + (8*(dy&7)))];
02312 int b= 8-a;
02313 for(y=0; y < b_h; y++){
02314 for(x=0; x < b_w; x++){
02315 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
02316 }
02317 src1+=stride;
02318 src2+=stride;
02319 dst +=stride;
02320 }
02321 }
02322 STOP_TIMER("mc_block")
02323 }
02324
02325 #define mca(dx,dy,b_w)\
02326 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
02327 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
02328 assert(h==b_w);\
02329 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
02330 }
02331
02332 mca( 0, 0,16)
02333 mca( 8, 0,16)
02334 mca( 0, 8,16)
02335 mca( 8, 8,16)
02336 mca( 0, 0,8)
02337 mca( 8, 0,8)
02338 mca( 0, 8,8)
02339 mca( 8, 8,8)
02340
02341 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
02342 if(block->type & BLOCK_INTRA){
02343 int x, y;
02344 const int color = block->color[plane_index];
02345 const int color4= color*0x01010101;
02346 if(b_w==32){
02347 for(y=0; y < b_h; y++){
02348 *(uint32_t*)&dst[0 + y*stride]= color4;
02349 *(uint32_t*)&dst[4 + y*stride]= color4;
02350 *(uint32_t*)&dst[8 + y*stride]= color4;
02351 *(uint32_t*)&dst[12+ y*stride]= color4;
02352 *(uint32_t*)&dst[16+ y*stride]= color4;
02353 *(uint32_t*)&dst[20+ y*stride]= color4;
02354 *(uint32_t*)&dst[24+ y*stride]= color4;
02355 *(uint32_t*)&dst[28+ y*stride]= color4;
02356 }
02357 }else if(b_w==16){
02358 for(y=0; y < b_h; y++){
02359 *(uint32_t*)&dst[0 + y*stride]= color4;
02360 *(uint32_t*)&dst[4 + y*stride]= color4;
02361 *(uint32_t*)&dst[8 + y*stride]= color4;
02362 *(uint32_t*)&dst[12+ y*stride]= color4;
02363 }
02364 }else if(b_w==8){
02365 for(y=0; y < b_h; y++){
02366 *(uint32_t*)&dst[0 + y*stride]= color4;
02367 *(uint32_t*)&dst[4 + y*stride]= color4;
02368 }
02369 }else if(b_w==4){
02370 for(y=0; y < b_h; y++){
02371 *(uint32_t*)&dst[0 + y*stride]= color4;
02372 }
02373 }else{
02374 for(y=0; y < b_h; y++){
02375 for(x=0; x < b_w; x++){
02376 dst[x + y*stride]= color;
02377 }
02378 }
02379 }
02380 }else{
02381 uint8_t *src= s->last_picture[block->ref].data[plane_index];
02382 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
02383 int mx= block->mx*scale;
02384 int my= block->my*scale;
02385 const int dx= mx&15;
02386 const int dy= my&15;
02387 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
02388 sx += (mx>>4) - (HTAPS_MAX/2-1);
02389 sy += (my>>4) - (HTAPS_MAX/2-1);
02390 src += sx + sy*stride;
02391 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
02392 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
02393 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
02394 src= tmp + MB_SIZE;
02395 }
02396
02397
02398 assert(b_w>1 && b_h>1);
02399 assert(tab_index>=0 && tab_index<4 || b_w==32);
02400 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
02401 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
02402 else if(b_w==32){
02403 int y;
02404 for(y=0; y<b_h; y+=16){
02405 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
02406 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
02407 }
02408 }else if(b_w==b_h)
02409 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
02410 else if(b_w==2*b_h){
02411 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
02412 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
02413 }else{
02414 assert(2*b_w==b_h);
02415 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
02416 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
02417 }
02418 }
02419 }
02420
02421 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
02422 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
02423 int y, x;
02424 IDWTELEM * dst;
02425 for(y=0; y<b_h; y++){
02426
02427 const uint8_t *obmc1= obmc + y*obmc_stride;
02428 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
02429 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
02430 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
02431 dst = slice_buffer_get_line(sb, src_y + y);
02432 for(x=0; x<b_w; x++){
02433 int v= obmc1[x] * block[3][x + y*src_stride]
02434 +obmc2[x] * block[2][x + y*src_stride]
02435 +obmc3[x] * block[1][x + y*src_stride]
02436 +obmc4[x] * block[0][x + y*src_stride];
02437
02438 v <<= 8 - LOG2_OBMC_MAX;
02439 if(FRAC_BITS != 8){
02440 v >>= 8 - FRAC_BITS;
02441 }
02442 if(add){
02443 v += dst[x + src_x];
02444 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
02445 if(v&(~255)) v= ~(v>>31);
02446 dst8[x + y*src_stride] = v;
02447 }else{
02448 dst[x + src_x] -= v;
02449 }
02450 }
02451 }
02452 }
02453
02454
02455 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
02456 const int b_width = s->b_width << s->block_max_depth;
02457 const int b_height= s->b_height << s->block_max_depth;
02458 const int b_stride= b_width;
02459 BlockNode *lt= &s->block[b_x + b_y*b_stride];
02460 BlockNode *rt= lt+1;
02461 BlockNode *lb= lt+b_stride;
02462 BlockNode *rb= lb+1;
02463 uint8_t *block[4];
02464 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
02465 uint8_t tmp[src_stride*7*MB_SIZE];
02466 uint8_t *ptmp;
02467 int x,y;
02468
02469 if(b_x<0){
02470 lt= rt;
02471 lb= rb;
02472 }else if(b_x + 1 >= b_width){
02473 rt= lt;
02474 rb= lb;
02475 }
02476 if(b_y<0){
02477 lt= lb;
02478 rt= rb;
02479 }else if(b_y + 1 >= b_height){
02480 lb= lt;
02481 rb= rt;
02482 }
02483
02484 if(src_x<0){
02485 obmc -= src_x;
02486 b_w += src_x;
02487 if(!sliced && !offset_dst)
02488 dst -= src_x;
02489 src_x=0;
02490 }else if(src_x + b_w > w){
02491 b_w = w - src_x;
02492 }
02493 if(src_y<0){
02494 obmc -= src_y*obmc_stride;
02495 b_h += src_y;
02496 if(!sliced && !offset_dst)
02497 dst -= src_y*dst_stride;
02498 src_y=0;
02499 }else if(src_y + b_h> h){
02500 b_h = h - src_y;
02501 }
02502
02503 if(b_w<=0 || b_h<=0) return;
02504
02505 assert(src_stride > 2*MB_SIZE + 5);
02506 if(!sliced && offset_dst)
02507 dst += src_x + src_y*dst_stride;
02508 dst8+= src_x + src_y*src_stride;
02509
02510
02511 ptmp= tmp + 3*tmp_step;
02512 block[0]= ptmp;
02513 ptmp+=tmp_step;
02514 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
02515
02516 if(same_block(lt, rt)){
02517 block[1]= block[0];
02518 }else{
02519 block[1]= ptmp;
02520 ptmp+=tmp_step;
02521 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
02522 }
02523
02524 if(same_block(lt, lb)){
02525 block[2]= block[0];
02526 }else if(same_block(rt, lb)){
02527 block[2]= block[1];
02528 }else{
02529 block[2]= ptmp;
02530 ptmp+=tmp_step;
02531 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
02532 }
02533
02534 if(same_block(lt, rb) ){
02535 block[3]= block[0];
02536 }else if(same_block(rt, rb)){
02537 block[3]= block[1];
02538 }else if(same_block(lb, rb)){
02539 block[3]= block[2];
02540 }else{
02541 block[3]= ptmp;
02542 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
02543 }
02544 #if 0
02545 for(y=0; y<b_h; y++){
02546 for(x=0; x<b_w; x++){
02547 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
02548 if(add) dst[x + y*dst_stride] += v;
02549 else dst[x + y*dst_stride] -= v;
02550 }
02551 }
02552 for(y=0; y<b_h; y++){
02553 uint8_t *obmc2= obmc + (obmc_stride>>1);
02554 for(x=0; x<b_w; x++){
02555 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
02556 if(add) dst[x + y*dst_stride] += v;
02557 else dst[x + y*dst_stride] -= v;
02558 }
02559 }
02560 for(y=0; y<b_h; y++){
02561 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
02562 for(x=0; x<b_w; x++){
02563 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
02564 if(add) dst[x + y*dst_stride] += v;
02565 else dst[x + y*dst_stride] -= v;
02566 }
02567 }
02568 for(y=0; y<b_h; y++){
02569 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
02570 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
02571 for(x=0; x<b_w; x++){
02572 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
02573 if(add) dst[x + y*dst_stride] += v;
02574 else dst[x + y*dst_stride] -= v;
02575 }
02576 }
02577 #else
02578 if(sliced){
02579 START_TIMER
02580
02581 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
02582 STOP_TIMER("inner_add_yblock")
02583 }else
02584 for(y=0; y<b_h; y++){
02585
02586 const uint8_t *obmc1= obmc + y*obmc_stride;
02587 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
02588 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
02589 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
02590 for(x=0; x<b_w; x++){
02591 int v= obmc1[x] * block[3][x + y*src_stride]
02592 +obmc2[x] * block[2][x + y*src_stride]
02593 +obmc3[x] * block[1][x + y*src_stride]
02594 +obmc4[x] * block[0][x + y*src_stride];
02595
02596 v <<= 8 - LOG2_OBMC_MAX;
02597 if(FRAC_BITS != 8){
02598 v >>= 8 - FRAC_BITS;
02599 }
02600 if(add){
02601 v += dst[x + y*dst_stride];
02602 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
02603 if(v&(~255)) v= ~(v>>31);
02604 dst8[x + y*src_stride] = v;
02605 }else{
02606 dst[x + y*dst_stride] -= v;
02607 }
02608 }
02609 }
02610 #endif
02611 }
02612
02613 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
02614 Plane *p= &s->plane[plane_index];
02615 const int mb_w= s->b_width << s->block_max_depth;
02616 const int mb_h= s->b_height << s->block_max_depth;
02617 int x, y, mb_x;
02618 int block_size = MB_SIZE >> s->block_max_depth;
02619 int block_w = plane_index ? block_size/2 : block_size;
02620 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02621 int obmc_stride= plane_index ? block_size : 2*block_size;
02622 int ref_stride= s->current_picture.linesize[plane_index];
02623 uint8_t *dst8= s->current_picture.data[plane_index];
02624 int w= p->width;
02625 int h= p->height;
02626 START_TIMER
02627
02628 if(s->keyframe || (s->avctx->debug&512)){
02629 if(mb_y==mb_h)
02630 return;
02631
02632 if(add){
02633 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
02634 {
02635
02636 IDWTELEM * line = sb->line[y];
02637 for(x=0; x<w; x++)
02638 {
02639
02640 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
02641 v >>= FRAC_BITS;
02642 if(v&(~255)) v= ~(v>>31);
02643 dst8[x + y*ref_stride]= v;
02644 }
02645 }
02646 }else{
02647 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
02648 {
02649
02650 IDWTELEM * line = sb->line[y];
02651 for(x=0; x<w; x++)
02652 {
02653 line[x] -= 128 << FRAC_BITS;
02654
02655 }
02656 }
02657 }
02658
02659 return;
02660 }
02661
02662 for(mb_x=0; mb_x<=mb_w; mb_x++){
02663 START_TIMER
02664
02665 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
02666 block_w*mb_x - block_w/2,
02667 block_w*mb_y - block_w/2,
02668 block_w, block_w,
02669 w, h,
02670 w, ref_stride, obmc_stride,
02671 mb_x - 1, mb_y - 1,
02672 add, 0, plane_index);
02673
02674 STOP_TIMER("add_yblock")
02675 }
02676
02677 STOP_TIMER("predict_slice")
02678 }
02679
02680 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
02681 Plane *p= &s->plane[plane_index];
02682 const int mb_w= s->b_width << s->block_max_depth;
02683 const int mb_h= s->b_height << s->block_max_depth;
02684 int x, y, mb_x;
02685 int block_size = MB_SIZE >> s->block_max_depth;
02686 int block_w = plane_index ? block_size/2 : block_size;
02687 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02688 const int obmc_stride= plane_index ? block_size : 2*block_size;
02689 int ref_stride= s->current_picture.linesize[plane_index];
02690 uint8_t *dst8= s->current_picture.data[plane_index];
02691 int w= p->width;
02692 int h= p->height;
02693 START_TIMER
02694
02695 if(s->keyframe || (s->avctx->debug&512)){
02696 if(mb_y==mb_h)
02697 return;
02698
02699 if(add){
02700 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
02701 for(x=0; x<w; x++){
02702 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
02703 v >>= FRAC_BITS;
02704 if(v&(~255)) v= ~(v>>31);
02705 dst8[x + y*ref_stride]= v;
02706 }
02707 }
02708 }else{
02709 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
02710 for(x=0; x<w; x++){
02711 buf[x + y*w]-= 128<<FRAC_BITS;
02712 }
02713 }
02714 }
02715
02716 return;
02717 }
02718
02719 for(mb_x=0; mb_x<=mb_w; mb_x++){
02720 START_TIMER
02721
02722 add_yblock(s, 0, NULL, buf, dst8, obmc,
02723 block_w*mb_x - block_w/2,
02724 block_w*mb_y - block_w/2,
02725 block_w, block_w,
02726 w, h,
02727 w, ref_stride, obmc_stride,
02728 mb_x - 1, mb_y - 1,
02729 add, 1, plane_index);
02730
02731 STOP_TIMER("add_yblock")
02732 }
02733
02734 STOP_TIMER("predict_slice")
02735 }
02736
02737 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
02738 const int mb_h= s->b_height << s->block_max_depth;
02739 int mb_y;
02740 for(mb_y=0; mb_y<=mb_h; mb_y++)
02741 predict_slice(s, buf, plane_index, add, mb_y);
02742 }
02743
02744 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
02745 int i, x2, y2;
02746 Plane *p= &s->plane[plane_index];
02747 const int block_size = MB_SIZE >> s->block_max_depth;
02748 const int block_w = plane_index ? block_size/2 : block_size;
02749 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02750 const int obmc_stride= plane_index ? block_size : 2*block_size;
02751 const int ref_stride= s->current_picture.linesize[plane_index];
02752 uint8_t *src= s-> input_picture.data[plane_index];
02753 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
02754 const int b_stride = s->b_width << s->block_max_depth;
02755 const int w= p->width;
02756 const int h= p->height;
02757 int index= mb_x + mb_y*b_stride;
02758 BlockNode *b= &s->block[index];
02759 BlockNode backup= *b;
02760 int ab=0;
02761 int aa=0;
02762
02763 b->type|= BLOCK_INTRA;
02764 b->color[plane_index]= 0;
02765 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
02766
02767 for(i=0; i<4; i++){
02768 int mb_x2= mb_x + (i &1) - 1;
02769 int mb_y2= mb_y + (i>>1) - 1;
02770 int x= block_w*mb_x2 + block_w/2;
02771 int y= block_w*mb_y2 + block_w/2;
02772
02773 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
02774 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
02775
02776 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
02777 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
02778 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
02779 int obmc_v= obmc[index];
02780 int d;
02781 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
02782 if(x<0) obmc_v += obmc[index + block_w];
02783 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
02784 if(x+block_w>w) obmc_v += obmc[index - block_w];
02785
02786
02787 d = -dst[index] + (1<<(FRAC_BITS-1));
02788 dst[index] = d;
02789 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
02790 aa += obmc_v * obmc_v;
02791 }
02792 }
02793 }
02794 *b= backup;
02795
02796 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255);
02797 }
02798
02799 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
02800 const int b_stride = s->b_width << s->block_max_depth;
02801 const int b_height = s->b_height<< s->block_max_depth;
02802 int index= x + y*b_stride;
02803 const BlockNode *b = &s->block[index];
02804 const BlockNode *left = x ? &s->block[index-1] : &null_block;
02805 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
02806 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
02807 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
02808 int dmx, dmy;
02809
02810
02811
02812 if(x<0 || x>=b_stride || y>=b_height)
02813 return 0;
02814
02815
02816
02817
02818
02819
02820
02821
02822
02823 if(b->type & BLOCK_INTRA){
02824 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
02825 + av_log2(2*FFABS(left->color[1] - b->color[1]))
02826 + av_log2(2*FFABS(left->color[2] - b->color[2])));
02827 }else{
02828 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
02829 dmx-= b->mx;
02830 dmy-= b->my;
02831 return 2*(1 + av_log2(2*FFABS(dmx))
02832 + av_log2(2*FFABS(dmy))
02833 + av_log2(2*b->ref));
02834 }
02835 }
02836
02837 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
02838 Plane *p= &s->plane[plane_index];
02839 const int block_size = MB_SIZE >> s->block_max_depth;
02840 const int block_w = plane_index ? block_size/2 : block_size;
02841 const int obmc_stride= plane_index ? block_size : 2*block_size;
02842 const int ref_stride= s->current_picture.linesize[plane_index];
02843 uint8_t *dst= s->current_picture.data[plane_index];
02844 uint8_t *src= s-> input_picture.data[plane_index];
02845 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
02846 uint8_t cur[ref_stride*2*MB_SIZE];
02847 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
02848 const int b_stride = s->b_width << s->block_max_depth;
02849 const int b_height = s->b_height<< s->block_max_depth;
02850 const int w= p->width;
02851 const int h= p->height;
02852 int distortion;
02853 int rate= 0;
02854 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
02855 int sx= block_w*mb_x - block_w/2;
02856 int sy= block_w*mb_y - block_w/2;
02857 int x0= FFMAX(0,-sx);
02858 int y0= FFMAX(0,-sy);
02859 int x1= FFMIN(block_w*2, w-sx);
02860 int y1= FFMIN(block_w*2, h-sy);
02861 int i,x,y;
02862
02863 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
02864
02865 for(y=y0; y<y1; y++){
02866 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
02867 const IDWTELEM *pred1 = pred + y*obmc_stride;
02868 uint8_t *cur1 = cur + y*ref_stride;
02869 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
02870 for(x=x0; x<x1; x++){
02871 #if FRAC_BITS >= LOG2_OBMC_MAX
02872 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
02873 #else
02874 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
02875 #endif
02876 v = (v + pred1[x]) >> FRAC_BITS;
02877 if(v&(~255)) v= ~(v>>31);
02878 dst1[x] = v;
02879 }
02880 }
02881
02882
02883 if(LOG2_OBMC_MAX == 8
02884 && (mb_x == 0 || mb_x == b_stride-1)
02885 && (mb_y == 0 || mb_y == b_height-1)){
02886 if(mb_x == 0)
02887 x1 = block_w;
02888 else
02889 x0 = block_w;
02890 if(mb_y == 0)
02891 y1 = block_w;
02892 else
02893 y0 = block_w;
02894 for(y=y0; y<y1; y++)
02895 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
02896 }
02897
02898 if(block_w==16){
02899
02900
02901
02902
02903
02904
02905 if(s->avctx->me_cmp == FF_CMP_W97)
02906 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
02907 else if(s->avctx->me_cmp == FF_CMP_W53)
02908 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
02909 else{
02910 distortion = 0;
02911 for(i=0; i<4; i++){
02912 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
02913 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
02914 }
02915 }
02916 }else{
02917 assert(block_w==8);
02918 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
02919 }
02920
02921 if(plane_index==0){
02922 for(i=0; i<4; i++){
02923
02924
02925
02926
02927 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
02928 }
02929 if(mb_x == b_stride-2)
02930 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
02931 }
02932 return distortion + rate*penalty_factor;
02933 }
02934
02935 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
02936 int i, y2;
02937 Plane *p= &s->plane[plane_index];
02938 const int block_size = MB_SIZE >> s->block_max_depth;
02939 const int block_w = plane_index ? block_size/2 : block_size;
02940 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02941 const int obmc_stride= plane_index ? block_size : 2*block_size;
02942 const int ref_stride= s->current_picture.linesize[plane_index];
02943 uint8_t *dst= s->current_picture.data[plane_index];
02944 uint8_t *src= s-> input_picture.data[plane_index];
02945 static const IDWTELEM zero_dst[4096];
02946 const int b_stride = s->b_width << s->block_max_depth;
02947 const int w= p->width;
02948 const int h= p->height;
02949 int distortion= 0;
02950 int rate= 0;
02951 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
02952
02953 for(i=0; i<9; i++){
02954 int mb_x2= mb_x + (i%3) - 1;
02955 int mb_y2= mb_y + (i/3) - 1;
02956 int x= block_w*mb_x2 + block_w/2;
02957 int y= block_w*mb_y2 + block_w/2;
02958
02959 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
02960 x, y, block_w, block_w, w, h, 0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
02961
02962
02963 for(y2= y; y2<0; y2++)
02964 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
02965 for(y2= h; y2<y+block_w; y2++)
02966 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
02967 if(x<0){
02968 for(y2= y; y2<y+block_w; y2++)
02969 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
02970 }
02971 if(x+block_w > w){
02972 for(y2= y; y2<y+block_w; y2++)
02973 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
02974 }
02975
02976 assert(block_w== 8 || block_w==16);
02977 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
02978 }
02979
02980 if(plane_index==0){
02981 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
02982 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
02983
02984
02985
02986
02987
02988
02989 if(merged)
02990 rate = get_block_bits(s, mb_x, mb_y, 2);
02991 for(i=merged?4:0; i<9; i++){
02992 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
02993 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
02994 }
02995 }
02996 return distortion + rate*penalty_factor;
02997 }
02998
02999 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
03000 const int b_stride= s->b_width << s->block_max_depth;
03001 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
03002 BlockNode backup= *block;
03003 int rd, index, value;
03004
03005 assert(mb_x>=0 && mb_y>=0);
03006 assert(mb_x<b_stride);
03007
03008 if(intra){
03009 block->color[0] = p[0];
03010 block->color[1] = p[1];
03011 block->color[2] = p[2];
03012 block->type |= BLOCK_INTRA;
03013 }else{
03014 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
03015 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
03016 if(s->me_cache[index] == value)
03017 return 0;
03018 s->me_cache[index]= value;
03019
03020 block->mx= p[0];
03021 block->my= p[1];
03022 block->type &= ~BLOCK_INTRA;
03023 }
03024
03025 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
03026
03027
03028 if(rd < *best_rd){
03029 *best_rd= rd;
03030 return 1;
03031 }else{
03032 *block= backup;
03033 return 0;
03034 }
03035 }
03036
03037
03038 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
03039 int p[2] = {p0, p1};
03040 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
03041 }
03042
03043 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
03044 const int b_stride= s->b_width << s->block_max_depth;
03045 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
03046 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
03047 int rd, index, value;
03048
03049 assert(mb_x>=0 && mb_y>=0);
03050 assert(mb_x<b_stride);
03051 assert(((mb_x|mb_y)&1) == 0);
03052
03053 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
03054 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
03055 if(s->me_cache[index] == value)
03056 return 0;
03057 s->me_cache[index]= value;
03058
03059 block->mx= p0;
03060 block->my= p1;
03061 block->ref= ref;
03062 block->type &= ~BLOCK_INTRA;
03063 block[1]= block[b_stride]= block[b_stride+1]= *block;
03064
03065 rd= get_4block_rd(s, mb_x, mb_y, 0);
03066
03067
03068 if(rd < *best_rd){
03069 *best_rd= rd;
03070 return 1;
03071 }else{
03072 block[0]= backup[0];
03073 block[1]= backup[1];
03074 block[b_stride]= backup[2];
03075 block[b_stride+1]= backup[3];
03076 return 0;
03077 }
03078 }
03079
03080 static void iterative_me(SnowContext *s){
03081 int pass, mb_x, mb_y;
03082 const int b_width = s->b_width << s->block_max_depth;
03083 const int b_height= s->b_height << s->block_max_depth;
03084 const int b_stride= b_width;
03085 int color[3];
03086
03087 {
03088 RangeCoder r = s->c;
03089 uint8_t state[sizeof(s->block_state)];
03090 memcpy(state, s->block_state, sizeof(s->block_state));
03091 for(mb_y= 0; mb_y<s->b_height; mb_y++)
03092 for(mb_x= 0; mb_x<s->b_width; mb_x++)
03093 encode_q_branch(s, 0, mb_x, mb_y);
03094 s->c = r;
03095 memcpy(s->block_state, state, sizeof(s->block_state));
03096 }
03097
03098 for(pass=0; pass<25; pass++){
03099 int change= 0;
03100
03101 for(mb_y= 0; mb_y<b_height; mb_y++){
03102 for(mb_x= 0; mb_x<b_width; mb_x++){
03103 int dia_change, i, j, ref;
03104 int best_rd= INT_MAX, ref_rd;
03105 BlockNode backup, ref_b;
03106 const int index= mb_x + mb_y * b_stride;
03107 BlockNode *block= &s->block[index];
03108 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
03109 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
03110 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
03111 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
03112 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
03113 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
03114 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
03115 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
03116 const int b_w= (MB_SIZE >> s->block_max_depth);
03117 uint8_t obmc_edged[b_w*2][b_w*2];
03118
03119 if(pass && (block->type & BLOCK_OPT))
03120 continue;
03121 block->type |= BLOCK_OPT;
03122
03123 backup= *block;
03124
03125 if(!s->me_cache_generation)
03126 memset(s->me_cache, 0, sizeof(s->me_cache));
03127 s->me_cache_generation += 1<<22;
03128
03129
03130 {
03131 int x, y;
03132 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
03133 if(mb_x==0)
03134 for(y=0; y<b_w*2; y++)
03135 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
03136 if(mb_x==b_stride-1)
03137 for(y=0; y<b_w*2; y++)
03138 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
03139 if(mb_y==0){
03140 for(x=0; x<b_w*2; x++)
03141 obmc_edged[0][x] += obmc_edged[b_w-1][x];
03142 for(y=1; y<b_w; y++)
03143 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
03144 }
03145 if(mb_y==b_height-1){
03146 for(x=0; x<b_w*2; x++)
03147 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
03148 for(y=b_w; y<b_w*2-1; y++)
03149 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
03150 }
03151 }
03152
03153
03154 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
03155 {
03156 uint8_t *src= s-> input_picture.data[0];
03157 uint8_t *dst= s->current_picture.data[0];
03158 const int stride= s->current_picture.linesize[0];
03159 const int block_w= MB_SIZE >> s->block_max_depth;
03160 const int sx= block_w*mb_x - block_w/2;
03161 const int sy= block_w*mb_y - block_w/2;
03162 const int w= s->plane[0].width;
03163 const int h= s->plane[0].height;
03164 int y;
03165
03166 for(y=sy; y<0; y++)
03167 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
03168 for(y=h; y<sy+block_w*2; y++)
03169 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
03170 if(sx<0){
03171 for(y=sy; y<sy+block_w*2; y++)
03172 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
03173 }
03174 if(sx+block_w*2 > w){
03175 for(y=sy; y<sy+block_w*2; y++)
03176 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
03177 }
03178 }
03179
03180
03181 for(i=0; i<3; i++)
03182 color[i]= get_dc(s, mb_x, mb_y, i);
03183
03184
03185 if(pass > 0 && (block->type&BLOCK_INTRA)){
03186 int color0[3]= {block->color[0], block->color[1], block->color[2]};
03187 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
03188 }else
03189 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
03190
03191 ref_b= *block;
03192 ref_rd= best_rd;
03193 for(ref=0; ref < s->ref_frames; ref++){
03194 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
03195 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2)
03196 continue;
03197 block->ref= ref;
03198 best_rd= INT_MAX;
03199
03200 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
03201 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
03202 if(tb)
03203 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
03204 if(lb)
03205 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
03206 if(rb)
03207 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
03208 if(bb)
03209 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
03210
03211
03212
03213 do{
03214 dia_change=0;
03215 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
03216 for(j=0; j<i; j++){
03217 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
03218 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
03219 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
03220 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
03221 }
03222 }
03223 }while(dia_change);
03224
03225 do{
03226 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
03227 dia_change=0;
03228 for(i=0; i<8; i++)
03229 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
03230 }while(dia_change);
03231
03232
03233 mvr[0][0]= block->mx;
03234 mvr[0][1]= block->my;
03235 if(ref_rd > best_rd){
03236 ref_rd= best_rd;
03237 ref_b= *block;
03238 }
03239 }
03240 best_rd= ref_rd;
03241 *block= ref_b;
03242 #if 1
03243 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
03244
03245 #endif
03246 if(!same_block(block, &backup)){
03247 if(tb ) tb ->type &= ~BLOCK_OPT;
03248 if(lb ) lb ->type &= ~BLOCK_OPT;
03249 if(rb ) rb ->type &= ~BLOCK_OPT;
03250 if(bb ) bb ->type &= ~BLOCK_OPT;
03251 if(tlb) tlb->type &= ~BLOCK_OPT;
03252 if(trb) trb->type &= ~BLOCK_OPT;
03253 if(blb) blb->type &= ~BLOCK_OPT;
03254 if(brb) brb->type &= ~BLOCK_OPT;
03255 change ++;
03256 }
03257 }
03258 }
03259 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
03260 if(!change)
03261 break;
03262 }
03263
03264 if(s->block_max_depth == 1){
03265 int change= 0;
03266 for(mb_y= 0; mb_y<b_height; mb_y+=2){
03267 for(mb_x= 0; mb_x<b_width; mb_x+=2){
03268 int i;
03269 int best_rd, init_rd;
03270 const int index= mb_x + mb_y * b_stride;
03271 BlockNode *b[4];
03272
03273 b[0]= &s->block[index];
03274 b[1]= b[0]+1;
03275 b[2]= b[0]+b_stride;
03276 b[3]= b[2]+1;
03277 if(same_block(b[0], b[1]) &&
03278 same_block(b[0], b[2]) &&
03279 same_block(b[0], b[3]))
03280 continue;
03281
03282 if(!s->me_cache_generation)
03283 memset(s->me_cache, 0, sizeof(s->me_cache));
03284 s->me_cache_generation += 1<<22;
03285
03286 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
03287
03288
03289 check_4block_inter(s, mb_x, mb_y,
03290 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
03291 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
03292
03293 for(i=0; i<4; i++)
03294 if(!(b[i]->type&BLOCK_INTRA))
03295 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
03296
03297 if(init_rd != best_rd)
03298 change++;
03299 }
03300 }
03301 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
03302 }
03303 }
03304
03305 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
03306 const int level= b->level;
03307 const int w= b->width;
03308 const int h= b->height;
03309 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
03310 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
03311 int x,y, thres1, thres2;
03312
03313
03314 if(s->qlog == LOSSLESS_QLOG){
03315 for(y=0; y<h; y++)
03316 for(x=0; x<w; x++)
03317 dst[x + y*stride]= src[x + y*stride];
03318 return;
03319 }
03320
03321 bias= bias ? 0 : (3*qmul)>>3;
03322 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
03323 thres2= 2*thres1;
03324
03325 if(!bias){
03326 for(y=0; y<h; y++){
03327 for(x=0; x<w; x++){
03328 int i= src[x + y*stride];
03329
03330 if((unsigned)(i+thres1) > thres2){
03331 if(i>=0){
03332 i<<= QEXPSHIFT;
03333 i/= qmul;
03334 dst[x + y*stride]= i;
03335 }else{
03336 i= -i;
03337 i<<= QEXPSHIFT;
03338 i/= qmul;
03339 dst[x + y*stride]= -i;
03340 }
03341 }else
03342 dst[x + y*stride]= 0;
03343 }
03344 }
03345 }else{
03346 for(y=0; y<h; y++){
03347 for(x=0; x<w; x++){
03348 int i= src[x + y*stride];
03349
03350 if((unsigned)(i+thres1) > thres2){
03351 if(i>=0){
03352 i<<= QEXPSHIFT;
03353 i= (i + bias) / qmul;
03354 dst[x + y*stride]= i;
03355 }else{
03356 i= -i;
03357 i<<= QEXPSHIFT;
03358 i= (i + bias) / qmul;
03359 dst[x + y*stride]= -i;
03360 }
03361 }else
03362 dst[x + y*stride]= 0;
03363 }
03364 }
03365 }
03366 if(level+1 == s->spatial_decomposition_count){
03367
03368 }
03369 }
03370
03371 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
03372 const int w= b->width;
03373 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
03374 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
03375 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
03376 int x,y;
03377 START_TIMER
03378
03379 if(s->qlog == LOSSLESS_QLOG) return;
03380
03381 for(y=start_y; y<end_y; y++){
03382
03383 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
03384 for(x=0; x<w; x++){
03385 int i= line[x];
03386 if(i<0){
03387 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT));
03388 }else if(i>0){
03389 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
03390 }
03391 }
03392 }
03393 if(w > 200 ){
03394 STOP_TIMER("dquant")
03395 }
03396 }
03397
03398 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
03399 const int w= b->width;
03400 const int h= b->height;
03401 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
03402 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
03403 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
03404 int x,y;
03405 START_TIMER
03406
03407 if(s->qlog == LOSSLESS_QLOG) return;
03408
03409 for(y=0; y<h; y++){
03410 for(x=0; x<w; x++){
03411 int i= src[x + y*stride];
03412 if(i<0){
03413 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT));
03414 }else if(i>0){
03415 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
03416 }
03417 }
03418 }
03419 if(w > 200 ){
03420 STOP_TIMER("dquant")
03421 }
03422 }
03423
03424 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
03425 const int w= b->width;
03426 const int h= b->height;
03427 int x,y;
03428
03429 for(y=h-1; y>=0; y--){
03430 for(x=w-1; x>=0; x--){
03431 int i= x + y*stride;
03432
03433 if(x){
03434 if(use_median){
03435 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
03436 else src[i] -= src[i - 1];
03437 }else{
03438 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
03439 else src[i] -= src[i - 1];
03440 }
03441 }else{
03442 if(y) src[i] -= src[i - stride];
03443 }
03444 }
03445 }
03446 }
03447
03448 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
03449 const int w= b->width;
03450 int x,y;
03451
03452
03453
03454 IDWTELEM * line=0;
03455 IDWTELEM * prev;
03456
03457 if (start_y != 0)
03458 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
03459
03460 for(y=start_y; y<end_y; y++){
03461 prev = line;
03462
03463 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
03464 for(x=0; x<w; x++){
03465 if(x){
03466 if(use_median){
03467 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
03468 else line[x] += line[x - 1];
03469 }else{
03470 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
03471 else line[x] += line[x - 1];
03472 }
03473 }else{
03474 if(y) line[x] += prev[x];
03475 }
03476 }
03477 }
03478
03479
03480 }
03481
03482 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
03483 const int w= b->width;
03484 const int h= b->height;
03485 int x,y;
03486
03487 for(y=0; y<h; y++){
03488 for(x=0; x<w; x++){
03489 int i= x + y*stride;
03490
03491 if(x){
03492 if(use_median){
03493 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
03494 else src[i] += src[i - 1];
03495 }else{
03496 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
03497 else src[i] += src[i - 1];
03498 }
03499 }else{
03500 if(y) src[i] += src[i - stride];
03501 }
03502 }
03503 }
03504 }
03505
03506 static void encode_qlogs(SnowContext *s){
03507 int plane_index, level, orientation;
03508
03509 for(plane_index=0; plane_index<2; plane_index++){
03510 for(level=0; level<s->spatial_decomposition_count; level++){
03511 for(orientation=level ? 1:0; orientation<4; orientation++){
03512 if(orientation==2) continue;
03513 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
03514 }
03515 }
03516 }
03517 }
03518
03519 static void encode_header(SnowContext *s){
03520 int plane_index, i;
03521 uint8_t kstate[32];
03522
03523 memset(kstate, MID_STATE, sizeof(kstate));
03524
03525 put_rac(&s->c, kstate, s->keyframe);
03526 if(s->keyframe || s->always_reset){
03527 reset_contexts(s);
03528 s->last_spatial_decomposition_type=
03529 s->last_qlog=
03530 s->last_qbias=
03531 s->last_mv_scale=
03532 s->last_block_max_depth= 0;
03533 for(plane_index=0; plane_index<2; plane_index++){
03534 Plane *p= &s->plane[plane_index];
03535 p->last_htaps=0;
03536 p->last_diag_mc=0;
03537 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
03538 }
03539 }
03540 if(s->keyframe){
03541 put_symbol(&s->c, s->header_state, s->version, 0);
03542 put_rac(&s->c, s->header_state, s->always_reset);
03543 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
03544 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
03545 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
03546 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
03547 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
03548 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
03549 put_rac(&s->c, s->header_state, s->spatial_scalability);
03550
03551 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
03552
03553 encode_qlogs(s);
03554 }
03555
03556 if(!s->keyframe){
03557 int update_mc=0;
03558 for(plane_index=0; plane_index<2; plane_index++){
03559 Plane *p= &s->plane[plane_index];
03560 update_mc |= p->last_htaps != p->htaps;
03561 update_mc |= p->last_diag_mc != p->diag_mc;
03562 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
03563 }
03564 put_rac(&s->c, s->header_state, update_mc);
03565 if(update_mc){
03566 for(plane_index=0; plane_index<2; plane_index++){
03567 Plane *p= &s->plane[plane_index];
03568 put_rac(&s->c, s->header_state, p->diag_mc);
03569 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
03570 for(i= p->htaps/2; i; i--)
03571 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
03572
03573 p->last_diag_mc= p->diag_mc;
03574 p->last_htaps= p->htaps;
03575 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
03576 }
03577 }
03578 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
03579 put_rac(&s->c, s->header_state, 1);
03580 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
03581 encode_qlogs(s);
03582 }else
03583 put_rac(&s->c, s->header_state, 0);
03584 }
03585
03586 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
03587 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
03588 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
03589 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
03590 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
03591
03592 s->last_spatial_decomposition_type= s->spatial_decomposition_type;
03593 s->last_qlog = s->qlog;
03594 s->last_qbias = s->qbias;
03595 s->last_mv_scale = s->mv_scale;
03596 s->last_block_max_depth = s->block_max_depth;
03597 s->last_spatial_decomposition_count= s->spatial_decomposition_count;
03598 }
03599
03600 static void decode_qlogs(SnowContext *s){
03601 int plane_index, level, orientation;
03602
03603 for(plane_index=0; plane_index<3; plane_index++){
03604 for(level=0; level<s->spatial_decomposition_count; level++){
03605 for(orientation=level ? 1:0; orientation<4; orientation++){
03606 int q;
03607 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
03608 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
03609 else q= get_symbol(&s->c, s->header_state, 1);
03610 s->plane[plane_index].band[level][orientation].qlog= q;
03611 }
03612 }
03613 }
03614 }
03615
03616 static int decode_header(SnowContext *s){
03617 int plane_index;
03618 uint8_t kstate[32];
03619
03620 memset(kstate, MID_STATE, sizeof(kstate));
03621
03622 s->keyframe= get_rac(&s->c, kstate);
03623 if(s->keyframe || s->always_reset){
03624 reset_contexts(s);
03625 s->spatial_decomposition_type=
03626 s->qlog=
03627 s->qbias=
03628 s->mv_scale=
03629 s->block_max_depth= 0;
03630 }
03631 if(s->keyframe){
03632 s->version= get_symbol(&s->c, s->header_state, 0);
03633 if(s->version>0){
03634 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
03635 return -1;
03636 }
03637 s->always_reset= get_rac(&s->c, s->header_state);
03638 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
03639 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
03640 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
03641 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
03642 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
03643 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
03644 s->spatial_scalability= get_rac(&s->c, s->header_state);
03645
03646 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
03647
03648 decode_qlogs(s);
03649 }
03650
03651 if(!s->keyframe){
03652 if(get_rac(&s->c, s->header_state)){
03653 for(plane_index=0; plane_index<2; plane_index++){
03654 int htaps, i, sum=0;
03655 Plane *p= &s->plane[plane_index];
03656 p->diag_mc= get_rac(&s->c, s->header_state);
03657 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
03658 if((unsigned)htaps > HTAPS_MAX || htaps==0)
03659 return -1;
03660 p->htaps= htaps;
03661 for(i= htaps/2; i; i--){
03662 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
03663 sum += p->hcoeff[i];
03664 }
03665 p->hcoeff[0]= 32-sum;
03666 }
03667 s->plane[2].diag_mc= s->plane[1].diag_mc;
03668 s->plane[2].htaps = s->plane[1].htaps;
03669 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
03670 }
03671 if(get_rac(&s->c, s->header_state)){
03672 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
03673 decode_qlogs(s);
03674 }
03675 }
03676
03677 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
03678 if(s->spatial_decomposition_type > 1){
03679 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
03680 return -1;
03681 }
03682
03683 s->