00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include "dsputil.h"
00026 #include "common.h"
00027
00028 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_4 ) = 0x0004000400040004ULL;
00029 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_5 ) = 0x0005000500050005ULL;
00030 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_7 ) = 0x0007000700070007ULL;
00031 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_42) = 0x002A002A002A002AULL;
00032 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_64) = 0x0040004000400040ULL;
00033 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_96) = 0x0060006000600060ULL;
00034
00035
00036
00037
00038
00039
00040
00041 #define SUMSUB_BA( a, b ) \
00042 "paddw "#b", "#a" \n\t"\
00043 "paddw "#b", "#b" \n\t"\
00044 "psubw "#a", "#b" \n\t"
00045
00046 #define SBUTTERFLY(a,b,t,n)\
00047 "movq " #a ", " #t " \n\t" \
00048 "punpckl" #n " " #b ", " #a " \n\t" \
00049 "punpckh" #n " " #b ", " #t " \n\t"
00050
00051 #define TRANSPOSE4(a,b,c,d,t)\
00052 SBUTTERFLY(a,b,t,wd) \
00053 SBUTTERFLY(c,d,b,wd) \
00054 SBUTTERFLY(a,c,d,dq) \
00055 SBUTTERFLY(t,b,c,dq)
00056
00057 static inline void cavs_idct8_1d(int16_t *block, uint64_t bias)
00058 {
00059 asm volatile(
00060 "movq 112(%0), %%mm4 \n\t"
00061 "movq 16(%0), %%mm5 \n\t"
00062 "movq 80(%0), %%mm2 \n\t"
00063 "movq 48(%0), %%mm7 \n\t"
00064 "movq %%mm4, %%mm0 \n\t"
00065 "movq %%mm5, %%mm3 \n\t"
00066 "movq %%mm2, %%mm6 \n\t"
00067 "movq %%mm7, %%mm1 \n\t"
00068
00069 "paddw %%mm4, %%mm4 \n\t"
00070 "paddw %%mm3, %%mm3 \n\t"
00071 "paddw %%mm6, %%mm6 \n\t"
00072 "paddw %%mm1, %%mm1 \n\t"
00073 "paddw %%mm4, %%mm0 \n\t"
00074 "paddw %%mm3, %%mm5 \n\t"
00075 "paddw %%mm6, %%mm2 \n\t"
00076 "paddw %%mm1, %%mm7 \n\t"
00077 "psubw %%mm4, %%mm5 \n\t"
00078 "paddw %%mm6, %%mm7 \n\t"
00079 "psubw %%mm2, %%mm1 \n\t"
00080 "paddw %%mm0, %%mm3 \n\t"
00081
00082 "movq %%mm5, %%mm4 \n\t"
00083 "movq %%mm7, %%mm6 \n\t"
00084 "movq %%mm3, %%mm0 \n\t"
00085 "movq %%mm1, %%mm2 \n\t"
00086 SUMSUB_BA( %%mm7, %%mm5 )
00087 "paddw %%mm3, %%mm7 \n\t"
00088 "paddw %%mm1, %%mm5 \n\t"
00089 "paddw %%mm7, %%mm7 \n\t"
00090 "paddw %%mm5, %%mm5 \n\t"
00091 "paddw %%mm6, %%mm7 \n\t"
00092 "paddw %%mm4, %%mm5 \n\t"
00093
00094 SUMSUB_BA( %%mm1, %%mm3 )
00095 "psubw %%mm1, %%mm4 \n\t"
00096 "movq %%mm4, %%mm1 \n\t"
00097 "psubw %%mm6, %%mm3 \n\t"
00098 "paddw %%mm1, %%mm1 \n\t"
00099 "paddw %%mm3, %%mm3 \n\t"
00100 "psubw %%mm2, %%mm1 \n\t"
00101 "paddw %%mm0, %%mm3 \n\t"
00102
00103 "movq 32(%0), %%mm2 \n\t"
00104 "movq 96(%0), %%mm6 \n\t"
00105 "movq %%mm2, %%mm4 \n\t"
00106 "movq %%mm6, %%mm0 \n\t"
00107 "psllw $2, %%mm4 \n\t"
00108 "psllw $2, %%mm6 \n\t"
00109 "paddw %%mm4, %%mm2 \n\t"
00110 "paddw %%mm6, %%mm0 \n\t"
00111 "paddw %%mm2, %%mm2 \n\t"
00112 "paddw %%mm0, %%mm0 \n\t"
00113 "psubw %%mm0, %%mm4 \n\t"
00114 "paddw %%mm2, %%mm6 \n\t"
00115
00116 "movq (%0), %%mm2 \n\t"
00117 "movq 64(%0), %%mm0 \n\t"
00118 SUMSUB_BA( %%mm0, %%mm2 )
00119 "psllw $3, %%mm0 \n\t"
00120 "psllw $3, %%mm2 \n\t"
00121 "paddw %1, %%mm0 \n\t"
00122 "paddw %1, %%mm2 \n\t"
00123
00124 SUMSUB_BA( %%mm6, %%mm0 )
00125 SUMSUB_BA( %%mm4, %%mm2 )
00126 SUMSUB_BA( %%mm7, %%mm6 )
00127 SUMSUB_BA( %%mm5, %%mm4 )
00128 SUMSUB_BA( %%mm3, %%mm2 )
00129 SUMSUB_BA( %%mm1, %%mm0 )
00130 :: "r"(block), "m"(bias)
00131 );
00132 }
00133
00134 static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
00135 {
00136 int i;
00137 DECLARE_ALIGNED_8(int16_t, b2[64]);
00138
00139 for(i=0; i<2; i++){
00140 DECLARE_ALIGNED_8(uint64_t, tmp);
00141
00142 cavs_idct8_1d(block+4*i, ff_pw_4);
00143
00144 asm volatile(
00145 "psraw $3, %%mm7 \n\t"
00146 "psraw $3, %%mm6 \n\t"
00147 "psraw $3, %%mm5 \n\t"
00148 "psraw $3, %%mm4 \n\t"
00149 "psraw $3, %%mm3 \n\t"
00150 "psraw $3, %%mm2 \n\t"
00151 "psraw $3, %%mm1 \n\t"
00152 "psraw $3, %%mm0 \n\t"
00153 "movq %%mm7, %0 \n\t"
00154 TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 )
00155 "movq %%mm0, 8(%1) \n\t"
00156 "movq %%mm6, 24(%1) \n\t"
00157 "movq %%mm7, 40(%1) \n\t"
00158 "movq %%mm4, 56(%1) \n\t"
00159 "movq %0, %%mm7 \n\t"
00160 TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 )
00161 "movq %%mm7, (%1) \n\t"
00162 "movq %%mm1, 16(%1) \n\t"
00163 "movq %%mm0, 32(%1) \n\t"
00164 "movq %%mm3, 48(%1) \n\t"
00165 : "=m"(tmp)
00166 : "r"(b2+32*i)
00167 : "memory"
00168 );
00169 }
00170
00171 for(i=0; i<2; i++){
00172 cavs_idct8_1d(b2+4*i, ff_pw_64);
00173
00174 asm volatile(
00175 "psraw $7, %%mm7 \n\t"
00176 "psraw $7, %%mm6 \n\t"
00177 "psraw $7, %%mm5 \n\t"
00178 "psraw $7, %%mm4 \n\t"
00179 "psraw $7, %%mm3 \n\t"
00180 "psraw $7, %%mm2 \n\t"
00181 "psraw $7, %%mm1 \n\t"
00182 "psraw $7, %%mm0 \n\t"
00183 "movq %%mm7, (%0) \n\t"
00184 "movq %%mm5, 16(%0) \n\t"
00185 "movq %%mm3, 32(%0) \n\t"
00186 "movq %%mm1, 48(%0) \n\t"
00187 "movq %%mm0, 64(%0) \n\t"
00188 "movq %%mm2, 80(%0) \n\t"
00189 "movq %%mm4, 96(%0) \n\t"
00190 "movq %%mm6, 112(%0) \n\t"
00191 :: "r"(b2+4*i)
00192 : "memory"
00193 );
00194 }
00195
00196 add_pixels_clamped_mmx(b2, dst, stride);
00197
00198
00199 asm volatile(
00200 "pxor %%mm7, %%mm7 \n\t"
00201 "movq %%mm7, (%0) \n\t"
00202 "movq %%mm7, 8(%0) \n\t"
00203 "movq %%mm7, 16(%0) \n\t"
00204 "movq %%mm7, 24(%0) \n\t"
00205 "movq %%mm7, 32(%0) \n\t"
00206 "movq %%mm7, 40(%0) \n\t"
00207 "movq %%mm7, 48(%0) \n\t"
00208 "movq %%mm7, 56(%0) \n\t"
00209 "movq %%mm7, 64(%0) \n\t"
00210 "movq %%mm7, 72(%0) \n\t"
00211 "movq %%mm7, 80(%0) \n\t"
00212 "movq %%mm7, 88(%0) \n\t"
00213 "movq %%mm7, 96(%0) \n\t"
00214 "movq %%mm7, 104(%0) \n\t"
00215 "movq %%mm7, 112(%0) \n\t"
00216 "movq %%mm7, 120(%0) \n\t"
00217 :: "r" (block)
00218 );
00219 }
00220
00221
00222
00223
00224
00225
00226
00227
00228 #define QPEL_CAVSV1(A,B,C,D,E,F,OP) \
00229 "movd (%0), "#F" \n\t"\
00230 "movq "#C", %%mm6 \n\t"\
00231 "pmullw %5, %%mm6 \n\t"\
00232 "movq "#D", %%mm7 \n\t"\
00233 "pmullw %6, %%mm7 \n\t"\
00234 "psllw $3, "#E" \n\t"\
00235 "psubw "#E", %%mm6 \n\t"\
00236 "psraw $3, "#E" \n\t"\
00237 "paddw %%mm7, %%mm6 \n\t"\
00238 "paddw "#E", %%mm6 \n\t"\
00239 "paddw "#B", "#B" \n\t"\
00240 "pxor %%mm7, %%mm7 \n\t"\
00241 "add %2, %0 \n\t"\
00242 "punpcklbw %%mm7, "#F" \n\t"\
00243 "psubw "#B", %%mm6 \n\t"\
00244 "psraw $1, "#B" \n\t"\
00245 "psubw "#A", %%mm6 \n\t"\
00246 "paddw %4, %%mm6 \n\t"\
00247 "psraw $7, %%mm6 \n\t"\
00248 "packuswb %%mm6, %%mm6 \n\t"\
00249 OP(%%mm6, (%1), A, d) \
00250 "add %3, %1 \n\t"
00251
00252
00253 #define QPEL_CAVSV2(A,B,C,D,E,F,OP) \
00254 "movd (%0), "#F" \n\t"\
00255 "movq "#C", %%mm6 \n\t"\
00256 "paddw "#D", %%mm6 \n\t"\
00257 "pmullw %5, %%mm6 \n\t"\
00258 "add %2, %0 \n\t"\
00259 "punpcklbw %%mm7, "#F" \n\t"\
00260 "psubw "#B", %%mm6 \n\t"\
00261 "psubw "#E", %%mm6 \n\t"\
00262 "paddw %4, %%mm6 \n\t"\
00263 "psraw $3, %%mm6 \n\t"\
00264 "packuswb %%mm6, %%mm6 \n\t"\
00265 OP(%%mm6, (%1), A, d) \
00266 "add %3, %1 \n\t"
00267
00268
00269 #define QPEL_CAVSV3(A,B,C,D,E,F,OP) \
00270 "movd (%0), "#F" \n\t"\
00271 "movq "#C", %%mm6 \n\t"\
00272 "pmullw %6, %%mm6 \n\t"\
00273 "movq "#D", %%mm7 \n\t"\
00274 "pmullw %5, %%mm7 \n\t"\
00275 "psllw $3, "#B" \n\t"\
00276 "psubw "#B", %%mm6 \n\t"\
00277 "psraw $3, "#B" \n\t"\
00278 "paddw %%mm7, %%mm6 \n\t"\
00279 "paddw "#B", %%mm6 \n\t"\
00280 "paddw "#E", "#E" \n\t"\
00281 "pxor %%mm7, %%mm7 \n\t"\
00282 "add %2, %0 \n\t"\
00283 "punpcklbw %%mm7, "#F" \n\t"\
00284 "psubw "#E", %%mm6 \n\t"\
00285 "psraw $1, "#E" \n\t"\
00286 "psubw "#F", %%mm6 \n\t"\
00287 "paddw %4, %%mm6 \n\t"\
00288 "psraw $7, %%mm6 \n\t"\
00289 "packuswb %%mm6, %%mm6 \n\t"\
00290 OP(%%mm6, (%1), A, d) \
00291 "add %3, %1 \n\t"
00292
00293
00294 #define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\
00295 int w= 2;\
00296 src -= 2*srcStride;\
00297 \
00298 while(w--){\
00299 asm volatile(\
00300 "pxor %%mm7, %%mm7 \n\t"\
00301 "movd (%0), %%mm0 \n\t"\
00302 "add %2, %0 \n\t"\
00303 "movd (%0), %%mm1 \n\t"\
00304 "add %2, %0 \n\t"\
00305 "movd (%0), %%mm2 \n\t"\
00306 "add %2, %0 \n\t"\
00307 "movd (%0), %%mm3 \n\t"\
00308 "add %2, %0 \n\t"\
00309 "movd (%0), %%mm4 \n\t"\
00310 "add %2, %0 \n\t"\
00311 "punpcklbw %%mm7, %%mm0 \n\t"\
00312 "punpcklbw %%mm7, %%mm1 \n\t"\
00313 "punpcklbw %%mm7, %%mm2 \n\t"\
00314 "punpcklbw %%mm7, %%mm3 \n\t"\
00315 "punpcklbw %%mm7, %%mm4 \n\t"\
00316 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
00317 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
00318 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
00319 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
00320 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
00321 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
00322 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
00323 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
00324 \
00325 : "+a"(src), "+c"(dst)\
00326 : "S"((long)srcStride), "D"((long)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\
00327 : "memory"\
00328 );\
00329 if(h==16){\
00330 asm volatile(\
00331 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
00332 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
00333 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
00334 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
00335 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
00336 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
00337 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
00338 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
00339 \
00340 : "+a"(src), "+c"(dst)\
00341 : "S"((long)srcStride), "D"((long)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\
00342 : "memory"\
00343 );\
00344 }\
00345 src += 4-(h+5)*srcStride;\
00346 dst += 4-h*dstStride;\
00347 }
00348
00349 #define QPEL_CAVS(OPNAME, OP, MMX)\
00350 static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00351 int h=8;\
00352 asm volatile(\
00353 "pxor %%mm7, %%mm7 \n\t"\
00354 "movq %5, %%mm6 \n\t"\
00355 "1: \n\t"\
00356 "movq (%0), %%mm0 \n\t"\
00357 "movq 1(%0), %%mm2 \n\t"\
00358 "movq %%mm0, %%mm1 \n\t"\
00359 "movq %%mm2, %%mm3 \n\t"\
00360 "punpcklbw %%mm7, %%mm0 \n\t"\
00361 "punpckhbw %%mm7, %%mm1 \n\t"\
00362 "punpcklbw %%mm7, %%mm2 \n\t"\
00363 "punpckhbw %%mm7, %%mm3 \n\t"\
00364 "paddw %%mm2, %%mm0 \n\t"\
00365 "paddw %%mm3, %%mm1 \n\t"\
00366 "pmullw %%mm6, %%mm0 \n\t"\
00367 "pmullw %%mm6, %%mm1 \n\t"\
00368 "movq -1(%0), %%mm2 \n\t"\
00369 "movq 2(%0), %%mm4 \n\t"\
00370 "movq %%mm2, %%mm3 \n\t"\
00371 "movq %%mm4, %%mm5 \n\t"\
00372 "punpcklbw %%mm7, %%mm2 \n\t"\
00373 "punpckhbw %%mm7, %%mm3 \n\t"\
00374 "punpcklbw %%mm7, %%mm4 \n\t"\
00375 "punpckhbw %%mm7, %%mm5 \n\t"\
00376 "paddw %%mm4, %%mm2 \n\t"\
00377 "paddw %%mm3, %%mm5 \n\t"\
00378 "psubw %%mm2, %%mm0 \n\t"\
00379 "psubw %%mm5, %%mm1 \n\t"\
00380 "movq %6, %%mm5 \n\t"\
00381 "paddw %%mm5, %%mm0 \n\t"\
00382 "paddw %%mm5, %%mm1 \n\t"\
00383 "psraw $3, %%mm0 \n\t"\
00384 "psraw $3, %%mm1 \n\t"\
00385 "packuswb %%mm1, %%mm0 \n\t"\
00386 OP(%%mm0, (%1),%%mm5, q) \
00387 "add %3, %0 \n\t"\
00388 "add %4, %1 \n\t"\
00389 "decl %2 \n\t"\
00390 " jnz 1b \n\t"\
00391 : "+a"(src), "+c"(dst), "+m"(h)\
00392 : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_4)\
00393 : "memory"\
00394 );\
00395 }\
00396 \
00397 static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00398 QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
00399 }\
00400 \
00401 static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00402 QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_5) \
00403 }\
00404 \
00405 static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00406 QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
00407 }\
00408 \
00409 static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00410 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\
00411 }\
00412 static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00413 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\
00414 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
00415 }\
00416 \
00417 static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00418 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\
00419 }\
00420 static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00421 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\
00422 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
00423 }\
00424 \
00425 static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00426 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\
00427 }\
00428 static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00429 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\
00430 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
00431 }\
00432 \
00433 static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00434 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
00435 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
00436 src += 8*srcStride;\
00437 dst += 8*dstStride;\
00438 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
00439 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
00440 }\
00441
00442 #define CAVS_MC(OPNAME, SIZE, MMX) \
00443 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00444 OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\
00445 }\
00446 \
00447 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00448 OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\
00449 }\
00450 \
00451 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00452 OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\
00453 }\
00454 \
00455 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00456 OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\
00457 }\
00458
00459 #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
00460 #define AVG_3DNOW_OP(a,b,temp, size) \
00461 "mov" #size " " #b ", " #temp " \n\t"\
00462 "pavgusb " #temp ", " #a " \n\t"\
00463 "mov" #size " " #a ", " #b " \n\t"
00464 #define AVG_MMX2_OP(a,b,temp, size) \
00465 "mov" #size " " #b ", " #temp " \n\t"\
00466 "pavgb " #temp ", " #a " \n\t"\
00467 "mov" #size " " #a ", " #b " \n\t"
00468
00469 QPEL_CAVS(put_, PUT_OP, 3dnow)
00470 QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow)
00471 QPEL_CAVS(put_, PUT_OP, mmx2)
00472 QPEL_CAVS(avg_, AVG_MMX2_OP, mmx2)
00473
00474 CAVS_MC(put_, 8, 3dnow)
00475 CAVS_MC(put_, 16,3dnow)
00476 CAVS_MC(avg_, 8, 3dnow)
00477 CAVS_MC(avg_, 16,3dnow)
00478 CAVS_MC(put_, 8, mmx2)
00479 CAVS_MC(put_, 16,mmx2)
00480 CAVS_MC(avg_, 8, mmx2)
00481 CAVS_MC(avg_, 16,mmx2)
00482
00483 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
00484 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
00485 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
00486 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
00487
00488 void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx) {
00489 #define dspfunc(PFX, IDX, NUM) \
00490 c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
00491 c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmx2; \
00492 c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmx2; \
00493 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmx2; \
00494 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmx2; \
00495
00496 dspfunc(put_cavs_qpel, 0, 16);
00497 dspfunc(put_cavs_qpel, 1, 8);
00498 dspfunc(avg_cavs_qpel, 0, 16);
00499 dspfunc(avg_cavs_qpel, 1, 8);
00500 #undef dspfunc
00501 c->cavs_idct8_add = cavs_idct8_add_mmx;
00502 }
00503
00504 void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx) {
00505 #define dspfunc(PFX, IDX, NUM) \
00506 c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
00507 c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \
00508 c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \
00509 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \
00510 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_3dnow; \
00511
00512 dspfunc(put_cavs_qpel, 0, 16);
00513 dspfunc(put_cavs_qpel, 1, 8);
00514 dspfunc(avg_cavs_qpel, 0, 16);
00515 dspfunc(avg_cavs_qpel, 1, 8);
00516 #undef dspfunc
00517 c->cavs_idct8_add = cavs_idct8_add_mmx;
00518 }