00001 #include <mythtv/mythconfig.h>
00002
00003 #if defined(MMX) && !defined(ARCH_X86_64)
00004
00005
00006
00007 #define STRICT_COMPAT
00008
00009
00010 #define BUFFPOINTNB 16
00011 #define BUFFPOINTMASK 0xffff
00012 #define BUFFINCR 0xff
00013
00014 #define sqrtperte 16
00015
00016 #define PERTEMASK 0xf
00017
00018 #define PERTEDEC 4
00019
00020
00021
00022 #include "mmx.h"
00023
00024 int zoom_filter_xmmx_supported () {
00025 return (mm_support()&0x8)>>3;
00026 }
00027
00028 void zoom_filter_xmmx (int prevX, int prevY,
00029 unsigned int *expix1, unsigned int *expix2,
00030 int *lbruS, int *lbruD, int buffratio,
00031 int precalCoef[16][16])
00032 {
00033 int bufsize = prevX * prevY;
00034 volatile int loop;
00035
00036 mmx_t *brutS = (mmx_t*)lbruS;
00037 mmx_t *brutD = (mmx_t*)lbruD;
00038
00039 volatile mmx_t prevXY;
00040 volatile mmx_t ratiox;
00041
00042
00043 expix1[0]=expix1[prevX-1]=expix1[prevX*prevY-1]=expix1[prevX*prevY-prevX]=0;
00044
00045 prevXY.ud[0] = (prevX-1)<<PERTEDEC;
00046 prevXY.ud[1] = (prevY-1)<<PERTEDEC;
00047
00048 ratiox.d[0] = buffratio;
00049 ratiox.d[1] = buffratio;
00050 movq_m2r (ratiox, mm6);
00051 pslld_i2r (16,mm6);
00052
00053 pxor_r2r (mm7,mm7);
00054
00055 loop=0;
00056
00057
00058
00059
00060 while (loop < bufsize)
00061 {
00062
00063
00064
00065
00066
00067
00068 __asm__ __volatile__ (
00069 "movq %0,%%mm0\n"
00070 "movq %1,%%mm1\n"
00071 : :"m"(brutS[loop]),"m"(brutD[loop])
00072 );
00073
00074 psubd_r2r (mm0,mm1);
00075 movq_r2r (mm1, mm2);
00076
00077 pslld_i2r (16,mm1);
00078 mmx_r2r (pmulhuw, mm6, mm1);
00079 pmullw_r2r (mm6, mm2);
00080
00081 paddd_r2r (mm2, mm1);
00082 pslld_i2r (16,mm0);
00083
00084 paddd_r2r (mm1, mm0);
00085 psrld_i2r (16, mm0);
00086
00087
00088
00089
00090
00091
00092
00093 movq_m2r (prevXY,mm1);
00094 pcmpgtd_r2r (mm0, mm1);
00095
00096
00097
00098 #ifdef STRICT_COMPAT
00099 movq_r2r (mm1,mm2);
00100 punpckhdq_r2r (mm2,mm2);
00101 punpckldq_r2r (mm1,mm1);
00102 pand_r2r (mm2, mm0);
00103 #endif
00104 pand_r2r (mm1, mm0);
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114 __asm__ __volatile__ (
00115 "movd %%mm0,%%ecx\n"
00116 "movq %%mm0,%%mm1\n"
00117
00118 "andl $15,%%ecx\n"
00119 "psrlq $32,%%mm1\n"
00120
00121 "shll $6,%%ecx\n"
00122 "movd %%mm1,%%eax\n"
00123
00124 "addl %0,%%ecx\n"
00125 "andl $15,%%eax\n"
00126
00127 "movd (%%ecx,%%eax,4),%%mm3\n"
00128
00129 ::"m"(precalCoef):"eax","ecx");
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160 psrld_i2r (PERTEDEC,mm0);
00161 psrld_i2r (PERTEDEC,mm1);
00162 __asm__ __volatile__ (
00163 "movd %%mm1,%%eax\n"
00164 "movq %%mm3,%%mm5\n"
00165
00166 "mull %1\n"
00167 "movd %%mm0,%%ecx\n"
00168 "punpcklbw %%mm5, %%mm3\n"
00169
00170 "addl %%ecx,%%eax\n"
00171 "movq %%mm3,%%mm4\n"
00172 "movq %%mm3,%%mm5\n"
00173
00174 "movl %0,%%ecx\n"
00175 "punpcklbw %%mm5,%%mm3\n"
00176
00177 "movq (%%ecx,%%eax,4),%%mm0\n"
00178 "punpckhbw %%mm5,%%mm4\n"
00179
00180 "addl %1,%%eax\n"
00181 "movq (%%ecx,%%eax,4),%%mm2\n"
00182
00183 : : "X"(expix1), "X"(prevX):"eax","ecx"
00184 );
00185
00186
00187
00188
00189
00190
00191
00192
00193 movq_r2r (mm0, mm1);
00194
00195
00196 punpcklbw_r2r (mm7, mm0);
00197
00198
00199
00200 movq_r2r (mm3, mm5);
00201
00202
00203 punpckhbw_r2r (mm7, mm1);
00204
00205 punpcklbw_r2r (mm7, mm5);
00206 punpckhbw_r2r (mm7, mm3);
00207
00208
00209 pmullw_r2r (mm5, mm0);
00210 pmullw_r2r (mm3, mm1);
00211 paddw_r2r (mm1, mm0);
00212
00213
00214 movq_r2r (mm4, mm5);
00215 punpcklbw_r2r (mm7, mm4);
00216 punpckhbw_r2r (mm7, mm5);
00217
00218
00219 movq_r2r (mm2, mm1);
00220
00221
00222 punpcklbw_r2r (mm7, mm1);
00223 punpckhbw_r2r (mm7, mm2);
00224
00225
00226 pmullw_r2r (mm4, mm1);
00227 pmullw_r2r (mm5, mm2);
00228
00229
00230 paddw_r2r (mm1, mm0);
00231 paddw_r2r (mm2, mm0);
00232
00233
00234 psrlw_i2r (8, mm0);
00235 packuswb_r2r (mm7, mm0);
00236
00237 movd_r2m (mm0,expix2[loop]);
00238
00239 ++loop;
00240 }
00241 #ifdef HAVE_ATHLON
00242 __asm__ __volatile__ ("femms\n");
00243 #else
00244 emms();
00245 #endif
00246 }
00247 #else
00248 int zoom_filter_xmmx_supported () {
00249 return 0;
00250 }
00251 void zoom_filter_xmmx (int prevX, int prevY,
00252 unsigned int *expix1, unsigned int *expix2,
00253 int *lbruS, int *lbruD, int buffratio,
00254 int precalCoef[16][16])
00255 {
00256 (void) prevX; (void) prevY;
00257 (void) expix1; (void) expix2;
00258 (void) lbruS; (void) lbruD;
00259 (void) buffratio; (void) precalCoef;
00260 return;
00261 }
00262 #endif