00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <stdlib.h>
00024 #include <stdio.h>
00025
00026 #include "config.h"
00027 #ifdef HAVE_STDINT_H
00028 #include <stdint.h>
00029 #endif
00030 #include <inttypes.h>
00031
00032 #include <string.h>
00033 #include <math.h>
00034
00035 #include "filter.h"
00036 #include "frame.h"
00037
00038 #define MIN(a,b) ((a) > (b) ? (b) : (a))
00039 #define MAX(a,b) ((a) < (b) ? (b) : (a))
00040 #define ABS(a) ((a) > 0 ? (a) : (-(a)))
00041
00042 #define MIN3(a,b,c) MIN(MIN(a,b),c)
00043 #define MAX3(a,b,c) MAX(MAX(a,b),c)
00044
00045 #ifdef MMX
00046 #include "dsputil.h"
00047 #include "i386/mmx.h"
00048 #endif
00049
00050 #include "aclib.h"
00051
00052 static void* (*fast_memcpy)(void * to, const void * from, size_t len);
00053
00054 typedef struct ThisFilter
00055 {
00056 VideoFilter vf;
00057
00058 long long last_framenr;
00059
00060 uint8_t *ref[4][3];
00061 int stride[3];
00062 int8_t got_frames[4];
00063
00064 void (*filter_line)(struct ThisFilter *p, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int refs, int parity);
00065
00066 int mode;
00067 int width;
00068 int height;
00069
00070 int mm_flags;
00071 TF_STRUCT;
00072 } ThisFilter;
00073
00074
00075 static void AllocFilter(ThisFilter* filter, int width, int height)
00076 {
00077 int i,j;
00078 if ((width != filter->width) || height != filter->height)
00079 {
00080 printf("yadifdeint: size changed from %d x %d -> %d x %d\n", filter->width, filter->height, width, height);
00081 for (i=0; i<3*3; i++)
00082 {
00083 uint8_t **p= &filter->ref[i%3][i/3];
00084 if (*p) free(*p - 3*filter->stride[i/3]);
00085 *p= NULL;
00086 }
00087 for (i=0; i<3; i++)
00088 {
00089 int is_chroma= !!i;
00090 int w= ((width + 31) & (~31))>>is_chroma;
00091 int h= ((height+6+ 31) & (~31))>>is_chroma;
00092
00093 filter->stride[i]= w;
00094 for (j=0; j<3; j++)
00095 {
00096
00097 filter->ref[j][i]= (uint8_t*)calloc(w*h*sizeof(uint8_t),1)+3*w;
00098 }
00099 }
00100 filter->width = width;
00101 filter->height = height;
00102 memset(filter->got_frames, 0, sizeof(filter->got_frames));
00103 }
00104 }
00105
00106 static inline void * memcpy_pic2(void * dst, const void * src,
00107 int bytesPerLine, int height,
00108 int dstStride, int srcStride, int limit2width)
00109 {
00110 int i;
00111 void *retval=dst;
00112
00113 if (!limit2width && dstStride == srcStride)
00114 {
00115 if (srcStride < 0)
00116 {
00117 src = (uint8_t*)src + (height-1)*srcStride;
00118 dst = (uint8_t*)dst + (height-1)*dstStride;
00119 srcStride = -srcStride;
00120 }
00121 fast_memcpy(dst, src, srcStride*height);
00122 }
00123 else
00124 {
00125 for (i=0; i<height; i++)
00126 {
00127 fast_memcpy(dst, src, bytesPerLine);
00128 src = (uint8_t*)src + srcStride;
00129 dst = (uint8_t*)dst + dstStride;
00130 }
00131 }
00132
00133 return retval;
00134 }
00135 #define memcpy_pic(d, s, b, h, ds, ss) memcpy_pic2(d, s, b, h, ds, ss, 0)
00136
00137 static void store_ref(struct ThisFilter *p, uint8_t *src, int src_offsets[3], int src_stride[3], int width, int height)
00138 {
00139 int i;
00140
00141 memcpy (p->ref[3], p->ref[0], sizeof(uint8_t *)*3);
00142 memmove(p->ref[0], p->ref[1], sizeof(uint8_t *)*3*3);
00143
00144 memcpy (&p->got_frames[3], &p->got_frames[0], sizeof(uint8_t));
00145 memmove(&p->got_frames[0], &p->got_frames[1], sizeof(uint8_t) * 3);
00146
00147 for (i=0; i<3; i++)
00148 {
00149 int is_chroma= !!i;
00150 memcpy_pic(p->ref[2][i], src + src_offsets[i], width>>is_chroma, height>>is_chroma, p->stride[i], src_stride[i]);
00151 }
00152 p->got_frames[2] = 1;
00153 }
00154
00155
00156 #if defined(MMX)
00157
00158 #define LOAD4(mem,dst) \
00159 "movd "mem", "#dst" \n\t"\
00160 "punpcklbw %%mm7, "#dst" \n\t"
00161
00162 #define PABS(tmp,dst) \
00163 "pxor "#tmp", "#tmp" \n\t"\
00164 "psubw "#dst", "#tmp" \n\t"\
00165 "pmaxsw "#tmp", "#dst" \n\t"
00166
00167 #define CHECK(pj,mj) \
00168 "movq "#pj"(%[cur],%[mrefs]), %%mm2 \n\t" \
00169 "movq "#mj"(%[cur],%[prefs]), %%mm3 \n\t" \
00170 "movq %%mm2, %%mm4 \n\t"\
00171 "movq %%mm2, %%mm5 \n\t"\
00172 "pxor %%mm3, %%mm4 \n\t"\
00173 "pavgb %%mm3, %%mm5 \n\t"\
00174 "pand %[pb1], %%mm4 \n\t"\
00175 "psubusb %%mm4, %%mm5 \n\t"\
00176 "psrlq $8, %%mm5 \n\t"\
00177 "punpcklbw %%mm7, %%mm5 \n\t" \
00178 "movq %%mm2, %%mm4 \n\t"\
00179 "psubusb %%mm3, %%mm2 \n\t"\
00180 "psubusb %%mm4, %%mm3 \n\t"\
00181 "pmaxub %%mm3, %%mm2 \n\t"\
00182 "movq %%mm2, %%mm3 \n\t"\
00183 "movq %%mm2, %%mm4 \n\t" \
00184 "psrlq $8, %%mm3 \n\t" \
00185 "psrlq $16, %%mm4 \n\t" \
00186 "punpcklbw %%mm7, %%mm2 \n\t"\
00187 "punpcklbw %%mm7, %%mm3 \n\t"\
00188 "punpcklbw %%mm7, %%mm4 \n\t"\
00189 "paddw %%mm3, %%mm2 \n\t"\
00190 "paddw %%mm4, %%mm2 \n\t"
00191
00192 #define CHECK1 \
00193 "movq %%mm0, %%mm3 \n\t"\
00194 "pcmpgtw %%mm2, %%mm3 \n\t" \
00195 "pminsw %%mm2, %%mm0 \n\t" \
00196 "movq %%mm3, %%mm6 \n\t"\
00197 "pand %%mm3, %%mm5 \n\t"\
00198 "pandn %%mm1, %%mm3 \n\t"\
00199 "por %%mm5, %%mm3 \n\t"\
00200 "movq %%mm3, %%mm1 \n\t"
00201
00202 #define CHECK2
00203 \
00204 "paddw %[pw1], %%mm6 \n\t"\
00205 "psllw $14, %%mm6 \n\t"\
00206 "paddsw %%mm6, %%mm2 \n\t"\
00207 "movq %%mm0, %%mm3 \n\t"\
00208 "pcmpgtw %%mm2, %%mm3 \n\t"\
00209 "pminsw %%mm2, %%mm0 \n\t"\
00210 "pand %%mm3, %%mm5 \n\t"\
00211 "pandn %%mm1, %%mm3 \n\t"\
00212 "por %%mm5, %%mm3 \n\t"\
00213 "movq %%mm3, %%mm1 \n\t"
00214
00215 static void filter_line_mmx2(struct ThisFilter *p, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int refs, int parity)
00216 {
00217 static const uint64_t pw_1 = 0x0001000100010001ULL;
00218 static const uint64_t pb_1 = 0x0101010101010101ULL;
00219 const int mode = p->mode;
00220 uint64_t tmp0, tmp1, tmp2, tmp3;
00221 int x;
00222
00223 #define FILTER\
00224 for (x=0; x<w; x+=4){\
00225 asm volatile(\
00226 "pxor %%mm7, %%mm7 \n\t"\
00227 LOAD4("(%[cur],%[mrefs])", %%mm0) \
00228 LOAD4("(%[cur],%[prefs])", %%mm1) \
00229 LOAD4("(%["prev2"])", %%mm2) \
00230 LOAD4("(%["next2"])", %%mm3) \
00231 "movq %%mm3, %%mm4 \n\t"\
00232 "paddw %%mm2, %%mm3 \n\t"\
00233 "psraw $1, %%mm3 \n\t" \
00234 "movq %%mm0, %[tmp0] \n\t" \
00235 "movq %%mm3, %[tmp1] \n\t" \
00236 "movq %%mm1, %[tmp2] \n\t" \
00237 "psubw %%mm4, %%mm2 \n\t"\
00238 PABS( %%mm4, %%mm2) \
00239 LOAD4("(%[prev],%[mrefs])", %%mm3) \
00240 LOAD4("(%[prev],%[prefs])", %%mm4) \
00241 "psubw %%mm0, %%mm3 \n\t"\
00242 "psubw %%mm1, %%mm4 \n\t"\
00243 PABS( %%mm5, %%mm3)\
00244 PABS( %%mm5, %%mm4)\
00245 "paddw %%mm4, %%mm3 \n\t" \
00246 "psrlw $1, %%mm2 \n\t"\
00247 "psrlw $1, %%mm3 \n\t"\
00248 "pmaxsw %%mm3, %%mm2 \n\t"\
00249 LOAD4("(%[next],%[mrefs])", %%mm3) \
00250 LOAD4("(%[next],%[prefs])", %%mm4) \
00251 "psubw %%mm0, %%mm3 \n\t"\
00252 "psubw %%mm1, %%mm4 \n\t"\
00253 PABS( %%mm5, %%mm3)\
00254 PABS( %%mm5, %%mm4)\
00255 "paddw %%mm4, %%mm3 \n\t" \
00256 "psrlw $1, %%mm3 \n\t"\
00257 "pmaxsw %%mm3, %%mm2 \n\t"\
00258 "movq %%mm2, %[tmp3] \n\t" \
00259 \
00260 "paddw %%mm0, %%mm1 \n\t"\
00261 "paddw %%mm0, %%mm0 \n\t"\
00262 "psubw %%mm1, %%mm0 \n\t"\
00263 "psrlw $1, %%mm1 \n\t" \
00264 PABS( %%mm2, %%mm0) \
00265 \
00266 "movq -1(%[cur],%[mrefs]), %%mm2 \n\t" \
00267 "movq -1(%[cur],%[prefs]), %%mm3 \n\t" \
00268 "movq %%mm2, %%mm4 \n\t"\
00269 "psubusb %%mm3, %%mm2 \n\t"\
00270 "psubusb %%mm4, %%mm3 \n\t"\
00271 "pmaxub %%mm3, %%mm2 \n\t"\
00272 "pshufw $9,%%mm2, %%mm3 \n\t"\
00273 "punpcklbw %%mm7, %%mm2 \n\t" \
00274 "punpcklbw %%mm7, %%mm3 \n\t" \
00275 "paddw %%mm2, %%mm0 \n\t"\
00276 "paddw %%mm3, %%mm0 \n\t"\
00277 "psubw %[pw1], %%mm0 \n\t" \
00278 \
00279 CHECK(-2,0)\
00280 CHECK1\
00281 CHECK(-3,1)\
00282 CHECK2\
00283 CHECK(0,-2)\
00284 CHECK1\
00285 CHECK(1,-3)\
00286 CHECK2\
00287 \
00288 \
00289 "movq %[tmp3], %%mm6 \n\t" \
00290 "cmp $2, %[mode] \n\t"\
00291 "jge 1f \n\t"\
00292 LOAD4("(%["prev2"],%[mrefs],2)", %%mm2) \
00293 LOAD4("(%["next2"],%[mrefs],2)", %%mm4) \
00294 LOAD4("(%["prev2"],%[prefs],2)", %%mm3) \
00295 LOAD4("(%["next2"],%[prefs],2)", %%mm5) \
00296 "paddw %%mm4, %%mm2 \n\t"\
00297 "paddw %%mm5, %%mm3 \n\t"\
00298 "psrlw $1, %%mm2 \n\t" \
00299 "psrlw $1, %%mm3 \n\t" \
00300 "movq %[tmp0], %%mm4 \n\t" \
00301 "movq %[tmp1], %%mm5 \n\t" \
00302 "movq %[tmp2], %%mm7 \n\t" \
00303 "psubw %%mm4, %%mm2 \n\t" \
00304 "psubw %%mm7, %%mm3 \n\t" \
00305 "movq %%mm5, %%mm0 \n\t"\
00306 "psubw %%mm4, %%mm5 \n\t" \
00307 "psubw %%mm7, %%mm0 \n\t" \
00308 "movq %%mm2, %%mm4 \n\t"\
00309 "pminsw %%mm3, %%mm2 \n\t"\
00310 "pmaxsw %%mm4, %%mm3 \n\t"\
00311 "pmaxsw %%mm5, %%mm2 \n\t"\
00312 "pminsw %%mm5, %%mm3 \n\t"\
00313 "pmaxsw %%mm0, %%mm2 \n\t" \
00314 "pminsw %%mm0, %%mm3 \n\t" \
00315 "pxor %%mm4, %%mm4 \n\t"\
00316 "pmaxsw %%mm3, %%mm6 \n\t"\
00317 "psubw %%mm2, %%mm4 \n\t" \
00318 "pmaxsw %%mm4, %%mm6 \n\t" \
00319 "1: \n\t"\
00320 \
00321 "movq %[tmp1], %%mm2 \n\t" \
00322 "movq %%mm2, %%mm3 \n\t"\
00323 "psubw %%mm6, %%mm2 \n\t" \
00324 "paddw %%mm6, %%mm3 \n\t" \
00325 "pmaxsw %%mm2, %%mm1 \n\t"\
00326 "pminsw %%mm3, %%mm1 \n\t" \
00327 "packuswb %%mm1, %%mm1 \n\t"\
00328 \
00329 :[tmp0]"=m"(tmp0),\
00330 [tmp1]"=m"(tmp1),\
00331 [tmp2]"=m"(tmp2),\
00332 [tmp3]"=m"(tmp3)\
00333 :[prev] "r"(prev),\
00334 [cur] "r"(cur),\
00335 [next] "r"(next),\
00336 [prefs]"r"((long)refs),\
00337 [mrefs]"r"((long)-refs),\
00338 [pw1] "m"(pw_1),\
00339 [pb1] "m"(pb_1),\
00340 [mode] "g"(mode)\
00341 );\
00342 asm volatile("movd %%mm1, %0" :"=m"(*dst));\
00343 dst += 4;\
00344 prev+= 4;\
00345 cur += 4;\
00346 next+= 4;\
00347 }
00348
00349 if (parity)
00350 {
00351 #define prev2 "prev"
00352 #define next2 "cur"
00353 FILTER
00354 #undef prev2
00355 #undef next2
00356 }
00357 else
00358 {
00359 #define prev2 "cur"
00360 #define next2 "next"
00361 FILTER
00362 #undef prev2
00363 #undef next2
00364 }
00365 }
00366 #undef LOAD4
00367 #undef PABS
00368 #undef CHECK
00369 #undef CHECK1
00370 #undef CHECK2
00371 #undef FILTER
00372
00373 #endif
00374
00375 static void filter_line_c(struct ThisFilter *p, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int refs, int parity)
00376 {
00377 int x;
00378 uint8_t *prev2= parity ? prev : cur ;
00379 uint8_t *next2= parity ? cur : next;
00380 for (x=0; x<w; x++)
00381 {
00382 int c= cur[-refs];
00383 int d= (prev2[0] + next2[0])>>1;
00384 int e= cur[+refs];
00385 int temporal_diff0= ABS(prev2[0] - next2[0]);
00386 int temporal_diff1=( ABS(prev[-refs] - c) + ABS(prev[+refs] - e) )>>1;
00387 int temporal_diff2=( ABS(next[-refs] - c) + ABS(next[+refs] - e) )>>1;
00388 int diff= MAX3(temporal_diff0>>1, temporal_diff1, temporal_diff2);
00389 int spatial_pred= (c+e)>>1;
00390 int spatial_score= ABS(cur[-refs-1] - cur[+refs-1]) + ABS(c-e)
00391 + ABS(cur[-refs+1] - cur[+refs+1]) - 1;
00392
00393 #define CHECK(j)\
00394 { int score= ABS(cur[-refs-1+j] - cur[+refs-1-j])\
00395 + ABS(cur[-refs +j] - cur[+refs -j])\
00396 + ABS(cur[-refs+1+j] - cur[+refs+1-j]);\
00397 if (score < spatial_score){\
00398 spatial_score= score;\
00399 spatial_pred= (cur[-refs +j] + cur[+refs -j])>>1;\
00400
00401 CHECK(-1) CHECK(-2) }} }}
00402 CHECK( 1) CHECK( 2) }} }}
00403
00404
00405 {
00406 int b= (prev2[-2*refs] + next2[-2*refs])>>1;
00407 int f= (prev2[+2*refs] + next2[+2*refs])>>1;
00408 #if 0
00409 int a= cur[-3*refs];
00410 int g= cur[+3*refs];
00411 int max= MAX3(d-e, d-c, MIN3(MAX(b-c,f-e),MAX(b-c,b-a),MAX(f-g,f-e)) );
00412 int min= MIN3(d-e, d-c, MAX3(MIN(b-c,f-e),MIN(b-c,b-a),MIN(f-g,f-e)) );
00413 #else
00414 int max= MAX3(d-e, d-c, MIN(b-c, f-e));
00415 int min= MIN3(d-e, d-c, MAX(b-c, f-e));
00416 #endif
00417
00418 diff= MAX3(diff, min, -max);
00419 }
00420
00421 if (spatial_pred > d + diff)
00422 spatial_pred = d + diff;
00423 else if (spatial_pred < d - diff)
00424 spatial_pred = d - diff;
00425
00426 dst[0] = spatial_pred;
00427
00428 dst++;
00429 cur++;
00430 prev++;
00431 next++;
00432 prev2++;
00433 next2++;
00434 }
00435 }
00436
00437 static void filter_func(struct ThisFilter *p, uint8_t *dst, int dst_offsets[3], int dst_stride[3], int width, int height, int parity, int tff)
00438 {
00439 int y, i;
00440
00441 uint8_t nr_p, nr_c, nr_n;
00442
00443
00444 nr_n = 2;
00445 nr_c = p->got_frames[1]?1:nr_n;
00446 nr_p = p->got_frames[0]?0:nr_c;
00447
00448 for (i=0; i<3; i++)
00449 {
00450 int is_chroma= !!i;
00451 int w= width >>is_chroma;
00452 int h= height>>is_chroma;
00453 int refs= p->stride[i];
00454
00455 for (y=0; y<h; y++)
00456 {
00457 if ((y ^ parity) & 1)
00458 {
00459 uint8_t *prev= &p->ref[nr_p][i][y*refs];
00460 uint8_t *cur = &p->ref[nr_c][i][y*refs];
00461 uint8_t *next= &p->ref[nr_n][i][y*refs];
00462 uint8_t *dst2= dst + dst_offsets[i] + y*dst_stride[i];
00463 p->filter_line(p, dst2, prev, cur, next, w, refs, parity ^ tff);
00464 }
00465 else
00466 {
00467 fast_memcpy(dst + dst_offsets[i] + y*dst_stride[i], &p->ref[nr_c][i][y*refs], w);
00468 }
00469 }
00470 }
00471 #ifdef MMX
00472 emms();
00473 #endif
00474 }
00475
00476
00477 static int YadifDeint (VideoFilter * f, VideoFrame * frame)
00478 {
00479 ThisFilter *filter = (ThisFilter *) f;
00480 TF_VARS;
00481
00482 int second_field = 0;
00483 AllocFilter(filter, frame->width, frame->height);
00484
00485
00486
00487 if (filter->last_framenr != frame->frameNumber)
00488 {
00489 if (filter->last_framenr != (frame->frameNumber - 1))
00490 {
00491
00492 memset(filter->got_frames, 0, sizeof(filter->got_frames));
00493 }
00494 store_ref(filter, frame->buf, frame->offsets, frame->pitches, frame->width, frame->height);
00495 second_field = 0;
00496 }
00497 else
00498 {
00499 second_field = 1;
00500 }
00501
00502
00503 filter_func(
00504 filter, frame->buf, frame->offsets, frame->pitches,
00505 frame->width, frame->height, second_field, frame->top_field_first);
00506
00507 filter->last_framenr = frame->frameNumber;
00508
00509 return 0;
00510 }
00511
00512
00513 void CleanupYadifDeintFilter (VideoFilter * filter)
00514 {
00515 int i;
00516 ThisFilter* f = (ThisFilter*)filter;
00517 for (i=0; i<3*3; i++)
00518 {
00519 uint8_t **p= &f->ref[i%3][i/3];
00520 if (*p) free(*p - 3*f->stride[i/3]);
00521 *p= NULL;
00522 }
00523 }
00524
00525 VideoFilter * YadifDeintFilter (VideoFrameType inpixfmt, VideoFrameType outpixfmt,
00526 int *width, int *height, char *options)
00527 {
00528 ThisFilter *filter;
00529 (void) height;
00530 (void) options;
00531
00532 fprintf(stderr, "Initialize Yadif Deinterlacer. In-Pixformat = %d Out-Pixformat=%d\n", inpixfmt, outpixfmt);
00533 filter = (ThisFilter *) malloc (sizeof(ThisFilter));
00534 if (filter == NULL)
00535 {
00536 fprintf (stderr, "YadifDeint: failed to allocate memory for filter.\n");
00537 return NULL;
00538 }
00539
00540 filter->width = 0;
00541 filter->height = 0;
00542 filter->mode = 1;
00543 memset(filter->ref, 0, sizeof(filter->ref));
00544
00545 AllocFilter(filter, *width, *height);
00546
00547 #ifdef MMX
00548 filter->mm_flags = mm_support();
00549 TF_INIT(filter);
00550 #else
00551 filter->mm_flags = 0;
00552 #endif
00553
00554 filter->filter_line = filter_line_c;
00555 #ifdef MMX
00556 if (filter->mm_flags & MM_MMX)
00557 {
00558 filter->filter_line = filter_line_mmx2;
00559 }
00560
00561 if (filter->mm_flags & MM_SSE2)
00562 fast_memcpy=fast_memcpy_SSE;
00563 else if (filter->mm_flags & MM_MMXEXT)
00564 fast_memcpy=fast_memcpy_MMX2;
00565 else if (filter->mm_flags & MM_3DNOW)
00566 fast_memcpy=fast_memcpy_3DNow;
00567 else if (filter->mm_flags & MM_MMX)
00568 fast_memcpy=fast_memcpy_MMX;
00569 else
00570 #endif
00571 fast_memcpy=memcpy;
00572
00573
00574
00575
00576 filter->vf.filter = &YadifDeint;
00577 filter->vf.cleanup = &CleanupYadifDeintFilter;
00578 return (VideoFilter *) filter;
00579 }
00580
00581
00582 static FmtConv FmtList[] =
00583 {
00584 { FMT_YV12, FMT_YV12 } ,
00585 FMT_NULL
00586 };
00587
00588 FilterInfo filter_table[] =
00589 {
00590 {
00591 symbol: "YadifDeintFilter",
00592 name: "yadifdeint",
00593 descript: "combines data from several fields to deinterlace with less motion blur",
00594 formats: FmtList,
00595 libname: NULL
00596 },
00597 {
00598 symbol: "YadifDeintFilter",
00599 name: "yadifdoubleprocessdeint",
00600 descript: "combines data from several fields to deinterlace with less motion blur",
00601 formats: FmtList,
00602 libname: NULL
00603 },FILT_NULL
00604 };
00605
00606