/* libfame - Fast Assembly MPEG Encoder Library Copyright (C) 2000-2001 Damien Vincent This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include #include #include /* memcpy, memset */ #include "fame.h" #include "fame_malloc.h" #include "fame_motion.h" #include "fame_motion_pmvfast.h" #ifdef HAS_MMX #include "mad_mmx.h" #else #include "mad_int.h" #endif #undef DEBUG #undef STAT #define COMPENSATE_MV_OVERHEAD #ifdef COMPENSATE_MV_OVERHEAD static const int mv_length[] = { 0, 2, 3, 4, 6, 7, 7, 7, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 12, 12 }; #define COMPENSATE_LAMBA 1 /* TODO: lookup table */ static inline int mv_overhead(fame_motion_vector_t *pmv, int dx, int dy, const int fcode, const int quant) { int size = 0; int pdx, pdy; pdx = dx - pmv->dx; pdy = dy - pmv->dy; if(pdx != 0) { if (pdx < 0) pdx = -pdx; pdx += (1 << (fcode - 1)) - 1; pdx >>= (fcode - 1); if (pdx > 32) pdx = 32; size += mv_length[pdx] + 1 + fcode - 1; } else size ++; if(pdy != 0) { if (pdy < 0) pdy = -pdy; pdy += (1 << (fcode - 1)) - 1; pdy >>= (fcode - 1); if (pdy > 32) pdy = 32; size += mv_length[pdy] + 1 + fcode - 1; } else size ++; return(COMPENSATE_LAMBA*size*quant); } #else #define mv_overhead(pmv, dx, dy, fcode, quant) 0 #endif #ifdef DEBUG static FILE *debug_log; #endif #ifdef STAT static FILE *stat_log; #endif typedef struct { int num_picture; int num_intra; int num_median; int num_early; int num_16x16; int num_8x8; int num_largediamond; int num_smalldiamond; int num_eval; } stat_motion_pmvfast; stat_motion_pmvfast motionstat; /******************** PUBLIC DEFINITIONS ********************/ static void pmvfast_init(fame_motion_t *motion, int mb_width, int mb_height, unsigned int flags); static void pmvfast_close(fame_motion_t *motion); static void pmvfast_enter(fame_motion_t *motion, fame_yuv_t **ref, fame_yuv_t *current, unsigned char *shape, int search_range); static fame_motion_coding_t pmvfast_estimation(fame_motion_t *motion, int mb_x, int mb_y, fame_motion_vector_t *vectors, unsigned char quant); static void pmvfast_leave(fame_motion_t *motion); FAME_CONSTRUCTOR(fame_motion_pmvfast_t) { fame_motion_t_constructor(FAME_MOTION(this)); FAME_OBJECT(this)->name = "predictive motion estimation"; this->FAME_OVERLOADED(init) = FAME_MOTION(this)->init; FAME_MOTION(this)->init = pmvfast_init; this->FAME_OVERLOADED(close) = FAME_MOTION(this)->close; FAME_MOTION(this)->close = pmvfast_close; this->FAME_OVERLOADED(enter) = FAME_MOTION(this)->enter; FAME_MOTION(this)->enter = pmvfast_enter; this->FAME_OVERLOADED(leave) = FAME_MOTION(this)->leave; FAME_MOTION(this)->leave = pmvfast_leave; FAME_MOTION(this)->estimation = pmvfast_estimation; return(this); } /******************** PRIVATE DEFINITIONS ********************/ #define MEDIAN(a,b,c) ((b)<(a))?(((c)>(a))?(a):(((c)<(b))?(b):(c))):(((c)<(a))?(a):(((c)>(b))?(b):(c))) typedef struct { int dx; int dy; int index_direction; /* the index of the direction : index = index(dx,dy) */ } direction_t; typedef struct { int nbre; direction_t *directions; } tab_direction_t; #define NULL_MOTION 1 #define INFINITE_ERROR 0xFFFFU #define MOTION_INTRA 0x0001 #define MOTION_INTER 0x0002 #define SIZE_MB 16 #define SIZE_SB 8 #define THRESHOLD0 256 #define LOWER_LIMIT_THRESHOLD1 512 #define UPPER_LIMIT_THRESHOLD1 1024 #define UPPER_LIMIT_THRESHOLD2 1792 #define THRESHOLD_SMALLDIAMOND 1536 #define THRESHOLD8x8 256 static inline void get_error(fame_yuv_t **ref, unsigned char *current, unsigned char *shape, fame_motion_vector_t *vectors, int offset[4], int edged_offset[4], int pitch, compute_error_t eval_error, int number); static inline int check_vector(fame_yuv_t **ref, unsigned char *current, unsigned char *shape, int x, int y, int width, int height, fame_motion_vector_t *pvector, fame_motion_vector_t *vectors, int offset[4], int edged_offset[4], int pitch, compute_error_t eval_error, fame_motion_vector_t *pmv, int fcode, int quant, int unrestricted); static inline int check_zero_vector(fame_yuv_t **ref, unsigned char *current, unsigned char *shape, int x, int y, int width, int height, fame_motion_vector_t *vectors, int offset[4], int edged_offset[4], int pitch, compute_error_t eval_error, fame_motion_vector_t *pmv, int fcode, int quant); static void find_macroblockvector(fame_yuv_t **ref, unsigned char *current, unsigned char *shape, int offset[4], int edged_offset[4], int x, int y, int width, int height, int pitch, tab_direction_t *table, int search_range, int step, int count, compute_error_t eval_error, fame_motion_vector_t *mv, fame_motion_vector_t *pmv, int fcode, int quant, int unrestricted); static void find_blockvector(fame_yuv_t **ref, unsigned char *current, unsigned char *shape, int offset, int edged_offset, int x, int y, int width, int height, int pitch, tab_direction_t *table, int search_range, int step, int count, compute_error_t eval_error, fame_motion_vector_t *mv, fame_motion_vector_t *pmv, int fcode, int quant, int unrestricted); /* motion tables */ /* large diamond: * 4 * 5 3 * 6 1 2 * 7 9 * 8 */ static direction_t td_block_largediamond0[9] = { { 0, 0, 1}, { 2, 0, 2}, { 1, 1, 3}, { 0, 2, 4}, {-1, 1, 5}, {-2, 0, 6}, {-1,-1, 7}, { 0,-2, 8}, { 1,-1, 9} }; static direction_t td_block_largediamond1[8] = { { 2, 0, 2}, { 1, 1, 3}, { 0, 2, 4}, {-1, 1, 5}, {-2, 0, 6}, {-1,-1, 7}, { 0,-2, 8}, { 1,-1, 9} }; static direction_t td_block_largediamond2[5] = { { 2, 0, 2}, { 1, 1, 3}, { 0, 2, 4}, { 0,-2, 8}, { 1,-1, 9} }; static direction_t td_block_largediamond3[3] = { { 2, 0, 2}, { 1, 1, 3}, { 0, 2, 4} }; static direction_t td_block_largediamond4[5] = { { 2, 0, 2}, { 1, 1, 3}, { 0, 2, 4}, {-1, 1, 5}, {-2, 0, 6} }; static direction_t td_block_largediamond5[3] = { {-1, 1, 5}, {-2, 0, 6}, { 0, 2, 4} }; static direction_t td_block_largediamond6[5] = { { 0, 2, 4}, {-1, 1, 5}, {-2, 0, 6}, {-1,-1, 7}, { 0,-2, 8} }; static direction_t td_block_largediamond7[3] = { {-2, 0, 6}, {-1,-1, 7}, { 0,-2, 8} }; static direction_t td_block_largediamond8[5] = { { 2, 0, 2}, {-2, 0, 6}, {-1,-1, 7}, { 0,-2, 8}, { 1,-1, 9} }; static direction_t td_block_largediamond9[3] = { { 2, 0, 2}, { 0,-2, 8}, { 1,-1, 9} }; static tab_direction_t td_block_largediamond[10] = { {9,td_block_largediamond0}, {8,td_block_largediamond1}, {5,td_block_largediamond2}, {3,td_block_largediamond3}, {5,td_block_largediamond4}, {3,td_block_largediamond5}, {5,td_block_largediamond6}, {3,td_block_largediamond7}, {5,td_block_largediamond8}, {3,td_block_largediamond9} }; /* small diamond: * 3 * 4 1 2 * 5 */ static direction_t td_block_smalldiamond0[5] = { { 0, 0, 1}, { 1, 0, 2}, { 0, 1, 3}, {-1, 0, 4}, { 0,-1, 5} }; static direction_t td_block_smalldiamond1[4] = { { 1, 0, 2}, { 0, 1, 3}, {-1, 0, 4}, { 0,-1, 5} }; static direction_t td_block_smalldiamond2[3] = { { 1, 0, 2}, { 0, 1, 3}, { 0,-1, 5} }; static direction_t td_block_smalldiamond3[3] = { { 1, 0, 2}, { 0, 1, 3}, {-1, 0, 4} }; static direction_t td_block_smalldiamond4[3] = { { 0, 1, 3}, {-1, 0, 4}, { 0,-1, 5} }; static direction_t td_block_smalldiamond5[3] = { { 1, 0, 2}, {-1, 0, 4}, { 0,-1, 5} }; static tab_direction_t td_block_smalldiamond[6] = { {5,td_block_smalldiamond0}, {4,td_block_smalldiamond1}, {3,td_block_smalldiamond2}, {3,td_block_smalldiamond3}, {3,td_block_smalldiamond4}, {3,td_block_smalldiamond5} }; /* gradient descent: * 5 4 3 * 6 1 2 * 7 8 9 */ static direction_t td_block_bbgds0[9] = { { 0, 0, 1}, { 1, 0, 2}, { 1, 1, 3}, { 0, 1, 4}, {-1, 1, 5}, {-1, 0, 6}, {-1,-1, 7}, { 0,-1, 8}, { 1,-1, 9} }; static direction_t td_block_bbgds1[8] = { { 1, 0, 2}, { 1, 1, 3}, { 0, 1, 4}, {-1, 1, 5}, {-1, 0, 6}, {-1,-1, 7}, { 0,-1, 8}, { 1,-1, 9} }; static direction_t td_block_bbgds2[3] = { { 1, 0, 2}, { 1, 1, 3}, { 1,-1, 9} }; static direction_t td_block_bbgds3[5] = { { 1, 0, 2}, { 1, 1, 3}, { 0, 1, 4}, {-1, 1, 5}, { 1,-1, 9} }; static direction_t td_block_bbgds4[3] = { { 1, 1, 3}, { 0, 1, 4}, {-1, 1, 5} }; static direction_t td_block_bbgds5[5] = { { 1, 1, 3}, { 0, 1, 4}, {-1, 1, 5}, {-1, 0, 6}, {-1,-1, 7} }; static direction_t td_block_bbgds6[3] = { {-1, 1, 5}, {-1, 0, 6}, {-1,-1, 7} }; static direction_t td_block_bbgds7[5] = { {-1, 1, 5}, {-1, 0, 6}, {-1,-1, 7}, { 0,-1, 8}, { 1,-1, 9} }; static direction_t td_block_bbgds8[3] = { {-1,-1, 7}, { 0,-1, 8}, { 1,-1, 9} }; static direction_t td_block_bbgds9[5] = { { 1, 0, 2}, { 1, 1, 3}, {-1,-1, 7}, { 0,-1, 8}, { 1,-1, 9} }; static tab_direction_t td_block_bbgds[10] = { {9,td_block_bbgds0}, {8,td_block_bbgds1}, {3,td_block_bbgds2}, {5,td_block_bbgds3}, {3,td_block_bbgds4}, {5,td_block_bbgds5}, {3,td_block_bbgds6}, {5,td_block_bbgds7}, {3,td_block_bbgds8}, {5,td_block_bbgds9} }; /******************** PUBLIC FUNCTIONS ********************/ static void pmvfast_init(fame_motion_t *motion, int mb_width, int mb_height, unsigned int flags) { FAME_MOTION_PMVFAST(motion)->FAME_OVERLOADED(init)(motion, mb_width, mb_height, flags); #if DEBUG debug_log = fopen("pmvfast_debug.log", "wb"); #endif #if STAT stat_log = fopen("pmvfast_stat.log", "wb"); motionstat.num_picture = 0; #endif /* double buffered motion vectors */ FAME_MOTION_PMVFAST(motion)->vectors[0] = (fame_motion_vector_t *) fame_malloc(mb_width*2*mb_height*2*sizeof(fame_motion_vector_t)); FAME_MOTION_PMVFAST(motion)->vectors[1] = (fame_motion_vector_t *) fame_malloc(mb_width*2*mb_height*2*sizeof(fame_motion_vector_t)); } static void pmvfast_close(fame_motion_t *motion) { FAME_MOTION_PMVFAST(motion)->FAME_OVERLOADED(close)(motion); #if DEBUG fclose(debug_log); #endif fame_free(FAME_MOTION_PMVFAST(motion)->vectors[0]); fame_free(FAME_MOTION_PMVFAST(motion)->vectors[1]); } static void pmvfast_enter(fame_motion_t *motion, fame_yuv_t **ref, fame_yuv_t *current, unsigned char *shape, int search_range) { FAME_MOTION_PMVFAST(motion)->FAME_OVERLOADED(enter)(motion, ref, current, shape, search_range); #if DEBUG fprintf(debug_log, "********** NEW PICTURE **********\n"); #endif #if STAT motionstat.num_eval = 0; motionstat.num_intra = 0; motionstat.num_median = 0; motionstat.num_early = 0; motionstat.num_16x16 = 0; motionstat.num_8x8 = 0; motionstat.num_largediamond = 0; motionstat.num_smalldiamond = 0; #endif } static void pmvfast_leave(fame_motion_t *motion) { fame_motion_vector_t *tmp; FAME_MOTION_PMVFAST(motion)->FAME_OVERLOADED(leave)(motion); #if STAT fprintf(stat_log, "\n********** PICTURE %d **********\n", motionstat.num_picture); fprintf(stat_log, "Number of SAD8x8 : %d\n", motionstat.num_eval); fprintf(stat_log, "Use of median vector: %d times\n", motionstat.num_median); fprintf(stat_log, "Early exit : %d times\n", motionstat.num_early); fprintf(stat_log, "Large diamond : %d times\n", motionstat.num_largediamond); fprintf(stat_log, "Small diamond : %d times\n", motionstat.num_smalldiamond); motionstat.num_picture++; #endif /* swap motion buffers */ tmp = FAME_MOTION_PMVFAST(motion)->vectors[1]; FAME_MOTION_PMVFAST(motion)->vectors[1] = FAME_MOTION_PMVFAST(motion)->vectors[0]; FAME_MOTION_PMVFAST(motion)->vectors[0] = tmp; } static inline void get_error(fame_yuv_t **ref, unsigned char *current, unsigned char *shape, fame_motion_vector_t *vectors, int offset[4], int edged_offset[4], int pitch, compute_error_t eval_error, int number) { int i; int residual, motion; for(i = 0; i < number; i++) { residual = (vectors[i].dx & 1) + ((vectors[i].dy & 1) << 1); motion = (vectors[i].dx >> 1) + (vectors[i].dy >> 1) * (pitch+32); vectors[i].error = eval_error(ref[residual]->y+motion+edged_offset[i], current+offset[i], shape+offset[i], pitch); } #if STAT motionstat.num_eval+=number; #endif } static inline int check_vector(fame_yuv_t **ref, unsigned char *current, unsigned char *shape, int x, int y, int width, int height, fame_motion_vector_t *pvector, fame_motion_vector_t *vectors, int offset[4], int edged_offset[4], int pitch, compute_error_t eval_error, fame_motion_vector_t *pmv, int fcode, int quant, int unrestricted) { int edge; const int dx = pvector->dx; const int dy = pvector->dy; edge = unrestricted << 4; /* unrestricted ? 16 : 0 */ if(((x+edge)<<1)+dx >= 0 && ((y+edge)<<1)+dy >= 0 && ((x-edge)<<1)+dx < ((width-16)<<1) && ((y-edge)<<1)+dy < ((height-16)<<1)) { int i; int residual, motion; int errors[4]; unsigned char *r; residual = (dx & 1) + ((dy & 1) << 1); motion = (dx >> 1) + (dy >> 1) * (pitch + 32); r = ref[residual]->y; for(i = 0; i < 4; i++) { errors[i] = eval_error(r+motion+edged_offset[i], current+offset[i], shape+offset[i], pitch); #if STAT motionstat.num_eval++; #endif } if(errors[0]+errors[1]+errors[2]+errors[3]+ mv_overhead(pmv, pvector[0].dx, pvector[0].dy, fcode, quant) < vectors[0].error+vectors[1].error+vectors[2].error+vectors[3].error+ mv_overhead(pmv, vectors[0].dx, vectors[0].dy,fcode, quant)) { /* use checked vector */ memcpy(vectors, pvector, 4*sizeof(fame_motion_vector_t)); vectors[0].error = errors[0]; vectors[1].error = errors[1]; vectors[2].error = errors[2]; vectors[3].error = errors[3]; return(0); } } return(1); } static inline int check_zero_vector(fame_yuv_t **ref, unsigned char *current, unsigned char *shape, int x, int y, int width, int height, fame_motion_vector_t *vectors, int offset[4], int edged_offset[4], int pitch, compute_error_t eval_error, fame_motion_vector_t *pmv, int fcode, int quant) { int i; int errors[4]; int total_error; for(i = 0; i < 4; i++) { errors[i] = eval_error(ref[0]->y+edged_offset[i], current+offset[i], shape+offset[i], pitch); #if STAT motionstat.num_eval++; #endif } total_error = errors[0]+errors[1]+errors[2]+errors[3]+ mv_overhead(pmv, 0, 0, fcode, quant); if(total_error < quant * 96) { /* favor the 0 vector & small error to favor skip mode */ total_error -= 128; errors[0] -= 32; errors[1] -= 32; errors[2] -= 32; errors[3] -= 32; } if(total_error < vectors[0].error+vectors[1].error+vectors[2].error+vectors[3].error+ mv_overhead(pmv, vectors[0].dx, vectors[0].dy, fcode, quant)) { /* use checked vector */ vectors[0].dx = vectors[0].dy = 0; vectors[1].dx = vectors[1].dy = 0; vectors[2].dx = vectors[2].dy = 0; vectors[3].dx = vectors[3].dy = 0; vectors[0].error = errors[0]; vectors[1].error = errors[1]; vectors[2].error = errors[2]; vectors[3].error = errors[3]; return(0); } return(1); } static void find_macroblockvector(fame_yuv_t **ref, unsigned char *current, unsigned char *shape, int offset[4], int edged_offset[4], int x, int y, int width, int height, int pitch, tab_direction_t *table, int search_range, int step, int count, compute_error_t eval_error, fame_motion_vector_t *mv, fame_motion_vector_t *pmv, int fcode, int quant, int unrestricted) { int i; int last_motion; tab_direction_t *current_table; int test_dx, test_dy, test_total; int best_dx, best_dy, best_total; int test_error0, test_error1, test_error2, test_error3; int subpel; int min_dx, max_dx, min_dy, max_dy; int motion, residual; unsigned char *location; subpel = 1; /* default (half-pel) */ last_motion = 1; current_table = &(table[last_motion]); best_total = mv[0].error + mv[1].error + mv[2].error + mv[3].error + mv_overhead(pmv, mv->dx, mv->dy, fcode, quant); if(unrestricted) { min_dx = -fame_min((x+16)<nbre; i++) { test_dx = mv->dx+(current_table->directions[i].dx << step); test_dy = mv->dy+(current_table->directions[i].dy << step); if (test_dx >= min_dx && test_dy >= min_dy && test_dx <= max_dx && test_dy <= max_dy) { /* Find the SAD for the blocks (8x8) */ motion = (test_dx >> subpel) + (test_dy >> subpel) * (pitch + 32); residual = (test_dx & ((1<y+motion; test_error0 = eval_error(location+edged_offset[0], current+offset[0], shape+offset[0], pitch); test_error1 = eval_error(location+edged_offset[1], current+offset[1], shape+offset[1], pitch); test_error2 = eval_error(location+edged_offset[2], current+offset[2], shape+offset[2], pitch); test_error3 = eval_error(location+edged_offset[3], current+offset[3], shape+offset[3], pitch); test_total = test_error0 + test_error1 + test_error2 + test_error3 + mv_overhead(pmv, test_dx, test_dy, fcode, quant); #if STAT motionstat.num_eval+=4; #endif } else { test_total = INFINITE_ERROR; test_error0 = test_error1 = test_error2 = test_error3 = INFINITE_ERROR; } #if DEBUG fprintf(debug_log, "errorBBGDS=%u\n",test_total); #endif /* Check if the current SAD (for the macroblock) if lesser than the SAD of the previous "best" macroblock */ if(test_total < best_total) { last_motion = current_table->directions[i].index_direction; best_dx = test_dx - mv->dx; best_dy = test_dy - mv->dy; best_total = test_total; mv[0].error = test_error0; mv[1].error = test_error1; mv[2].error = test_error2; mv[3].error = test_error3; } } /* Updates the motion vector and the location in the window ("current") */ if((best_dx | best_dy) != 0) { mv->dx += best_dx; mv->dy += best_dy; mv[3].dx = mv[2].dx = mv[1].dx = mv->dx; mv[3].dy = mv[2].dy = mv[1].dy = mv->dy; if(--count) current_table = &(table[last_motion]); else return; } else return; } } static void find_blockvector(fame_yuv_t **ref, unsigned char *current, unsigned char *shape, int offset, int edged_offset, int x, int y, int width, int height, int pitch, tab_direction_t *table, int search_range, int step, int count, compute_error_t eval_error, fame_motion_vector_t *mv, fame_motion_vector_t *pmv, int fcode, int quant, int unrestricted) { int i; int last_motion; tab_direction_t *current_table; int test_dx, test_dy, test_error; int best_dx, best_dy, best_error; int min_dx, max_dx, min_dy, max_dy; int subpel; int motion, residual; subpel = 1; /* default (half-pel) */ last_motion = 1; current_table = &(table[last_motion]); best_error = mv->error + mv_overhead(pmv, mv->dx, mv->dy, fcode, quant); if(unrestricted) { min_dx = -fame_min((x+16)<nbre; i++) { test_dx = mv->dx+(current_table->directions[i].dx << step); test_dy = mv->dy+(current_table->directions[i].dy << step); if (test_dx >= min_dx && test_dy >= min_dy && test_dx <= max_dx && test_dy <= max_dy) { /* Find the SAD for the block (8x8) */ motion = (test_dx >> subpel) + (test_dy >> subpel) * (pitch + 32); residual = (test_dx & ((1<y+motion+edged_offset, current+offset, shape+offset, pitch) + mv_overhead(pmv, test_dx, test_dy, fcode, quant); #if STAT motionstat.num_eval+=4; #endif } else test_error = INFINITE_ERROR; #if DEBUG fprintf(debug_log, "error block_vector=%u\n",test_error); #endif /* Check if the current SAD (for the macroblock) if lesser than the SAD of the previous "best" macroblock */ if(test_error < best_error) { last_motion = current_table->directions[i].index_direction; best_dx = test_dx - mv->dx; best_dy = test_dy - mv->dy; best_error = test_error; mv->error = test_error; } } /* Updates the motion vector and the location in the window ("current") */ if((best_dx | best_dy) != 0) { mv->dx += best_dx; mv->dy += best_dy; if(--count) current_table = &(table[last_motion]); else return; } else return; } } static fame_motion_coding_t pmvfast_estimation(fame_motion_t *motion, int mb_x, int mb_y, fame_motion_vector_t *vectors, unsigned char quant) { int k; int pitch; int x, y, width, height; int offset[4]; int edged_offset[4]; compute_error_t eval_error; fame_motion_vector_t pmv; fame_motion_vector_t *pvector; fame_motion_vector_t *plast; fame_motion_vector_t *pvector_left, *pvector_topleft; fame_motion_vector_t *pvector_top, *pvector_topright; unsigned char *shape; unsigned char *current; fame_yuv_t **ref; int use_median; int threshold0; int threshold1; int threshold2; int sad_inter4v, sad_inter, mad_inter, count; int range; int pred_same; int fcode; int is_left, is_top, is_topright; int diamond_count; int sad_last; int unrestricted; fame_motion_coding_t motion_coding; fcode = motion->fcode; #if DEBUG fprintf(debug_log, "\n***** macroblock : mb_y=%u mb_x=%u *****\n", mb_y, mb_x); #endif /* ***** Initialization ***** */ eval_error = motion->MAE8x8; x = mb_x << 4; y = mb_y << 4; width = motion->mb_width << 4; height = motion->mb_height << 4; shape = motion->shape; current = motion->current->y; pitch = motion->current->p; ref = motion->ref; range = motion->search_range; unrestricted = (motion->flags & FAME_MOTION_UNRESTRICTED_SEARCH)?1:0; offset[0] = y * pitch + x; offset[1] = y * pitch + x+8; offset[2] = (y+8) * pitch + x; offset[3] = (y+8) * pitch + x+8; edged_offset[0] = y * (pitch+32) + x; edged_offset[1] = y * (pitch+32) + x+8; edged_offset[2] = (y+8) * (pitch+32) + x; edged_offset[3] = (y+8) * (pitch+32) + x+8; if(motion->shape) { vectors[0].count = mad_withmask(current+offset[0], shape+offset[0], pitch, &vectors[0].deviation); vectors[1].count = mad_withmask(current+offset[1], shape+offset[1], pitch, &vectors[1].deviation); vectors[2].count = mad_withmask(current+offset[2], shape+offset[2], pitch, &vectors[2].deviation); vectors[3].count = mad_withmask(current+offset[3], shape+offset[3], pitch, &vectors[3].deviation); } else { vectors[0].count = mad_withoutmask(current+offset[0], pitch, &vectors[0].deviation); vectors[1].count = mad_withoutmask(current+offset[1], pitch, &vectors[1].deviation); vectors[2].count = mad_withoutmask(current+offset[2], pitch, &vectors[2].deviation); vectors[3].count = mad_withoutmask(current+offset[3], pitch, &vectors[3].deviation); } /* integer sample search */ /* Step1 : vectors around the current macroblock */ pvector = FAME_MOTION_PMVFAST(motion)->vectors[0] + (mb_y*motion->mb_width + mb_x)*4; plast = FAME_MOTION_PMVFAST(motion)->vectors[0] + (mb_y*motion->mb_width + mb_x)*4; pvector_left = pvector - 4; pvector_topleft = pvector - 4*motion->mb_width - 4; pvector_top = pvector - 4*motion->mb_width; pvector_topright = pvector - 4*motion->mb_width + 4; is_left = mb_x > 0; is_top = mb_y > 0; is_topright = is_top & (mb_x < motion->mb_width - 1); pred_same = 0; diamond_count = 2*range; if(is_left && is_topright /* && is_top */) { /* all vectors are valid */ if(pvector_left->dx == pvector_top->dx && pvector_left->dy == pvector_top->dy && pvector_left->dx == pvector_topright->dx && pvector_left->dy == pvector_topright->dy) /* all vectors are equal : do only one diamond search step */ diamond_count = 1; } /* Compute the weighted mean vector : */ /* dx = (f(e1)*dx1 + f(e2)*dx2 + f(e3)*dx3) / (dx1 + dx2 + dx3) */ /* weight_left = 65536 - (unsigned int)(macroblock_vector_left->error); weight_left = 65536 - (unsigned int)(macroblock_vector_top->error); weight_topright = 65536 - (unsigned int)(macroblock_vector_topright->error); macroblock_vector_barycentre.dx = (weight_left * macroblock_vector_left->dx + weight_top * macroblock_vector_top->dx + weight_topright * macroblock_vector_topright->dx ) / (weight_left + weight_top + weight_topright); macroblock_vector_barycentre.dy = (weight_left * macroblock_vector_left->dy + weight_top * macroblock_vector_top->dy + weight_topright * macroblock_vector_topright->dy ) / (weight_left + weight_top + weight_topright); */ /* saturate prediction to borders */ if(unrestricted) { if((x<<1)+vectors[0].dx<(-16<<1)) vectors[0].dx = (-16-x)<<1; if((y<<1)+vectors[0].dy<(-16<<1)) vectors[0].dy = (-16-y)<<1; if((x<<1)+vectors[0].dx>(width<<1)) vectors[0].dx = (width-x)<<1; if((y<<1)+vectors[0].dy>(height<<1)) vectors[0].dy = (height-y)<<1; } else { if((x<<1)+vectors[0].dx<0) vectors[0].dx = (-x)<<1; if((y<<1)+vectors[0].dy<0) vectors[0].dy = (-y)<<1; if((x<<1)+vectors[0].dx>((width-16)<<1)) vectors[0].dx = (width-16-x)<<1; if((y<<1)+vectors[0].dy>((height-16)<<1)) vectors[0].dy = (height-16-y)<<1; } /* Step2 : Calculate the thresholds */ threshold1 = INFINITE_ERROR; if(mb_x>0) threshold1 = fame_min(threshold1, pvector_left[0].error+pvector_left[1].error+pvector_left[2].error+pvector_left[3].error); if(mb_y>0) threshold1 = fame_min(threshold1, pvector_top[0].error+pvector_top[1].error+pvector_top[2].error+pvector_top[3].error); if(mb_y>0 && mb_xmb_width-1) threshold1 = fame_min(threshold1, pvector_topright[0].error+pvector_topright[1].error+pvector_topright[2].error+pvector_topright[3].error); threshold0 = vectors[0].count + vectors[1].count + vectors[2].count + vectors[3].count; threshold2 = threshold1 + threshold0; if(threshold1UPPER_LIMIT_THRESHOLD1) threshold1 = UPPER_LIMIT_THRESHOLD1; if(threshold2>UPPER_LIMIT_THRESHOLD2) threshold2 = UPPER_LIMIT_THRESHOLD2; #if DEBUG fprintf(debug_log, "threshold0 = %u\n", threshold0); fprintf(debug_log, "threshold1 = %u\n", threshold1); fprintf(debug_log, "threshold2 = %u\n", threshold2); #endif /* Step3 : Process a set of vectors whose matching probability is very high*/ /* i.e. median, zero, prev, left, top, topright vector */ /* Check the median vector */ pmv.dx = vectors[3].dx = vectors[2].dx = vectors[1].dx = vectors[0].dx; pmv.dy = vectors[3].dy = vectors[2].dy = vectors[1].dy = vectors[0].dy; get_error(ref, current, shape, vectors, offset, edged_offset, pitch, eval_error, 4); sad_inter = vectors[0].error+vectors[1].error+ vectors[2].error+vectors[3].error; sad_last = plast[0].error+plast[1].error+ plast[2].error+plast[3].error+ mv_overhead(&pmv, plast[0].dx, plast[0].dy, fcode, quant); if((sad_interdx == plast->dx && vectors->dy == plast->dy && sad_inter < sad_last)) { /* keep predicted vector */ #if STAT motionstat.num_median++; #endif memcpy(pvector, vectors, 4*sizeof(fame_motion_vector_t)); if((vectors[0].dx ^ vectors[1].dx) + (vectors[0].dx ^ vectors[2].dx) + (vectors[0].dx ^ vectors[3].dx) + (vectors[0].dy ^ vectors[1].dy) + (vectors[0].dy ^ vectors[2].dy) + (vectors[0].dy ^ vectors[3].dy)) return(motion_inter4v); else return(motion_inter); } #if DEBUG fprintf(debug_log, "Median vector : dx=%d dy=%d error=%d\n", vectors[0].dx, vectors[0].dy, vectors[0].error+vectors[1].error+vectors[2].error+vectors[3].error); #endif /* TODO: check performance of current 4MV prediction compared to 1MV */ /* Check the zero vector */ use_median = check_zero_vector(ref, current, shape, x, y, width, height, vectors, offset, edged_offset, pitch, eval_error, &pmv, fcode, quant); /* Check the previous vector */ /* fcode may have changed, make sure the vector is within the search range */ if(plast->dx >= -(range<<1) && plast->dx <= (range<<1)-1 && plast->dy >= -(range<<1) && plast->dy <= (range<<1)-1) use_median = check_vector(ref, current, shape, x, y, width, height, plast, vectors, offset, edged_offset, pitch, eval_error, &pmv, fcode, quant, unrestricted); /* Check the left vector */ if(!pred_same && is_left) { use_median &= check_vector(ref, current, shape, x, y, width, height, pvector_left, vectors, offset, edged_offset, pitch, eval_error, &pmv, fcode, quant, unrestricted); } /* Check the top vector */ if(!pred_same && is_top) { use_median &= check_vector(ref, current, shape, x, y, width, height, pvector_top, vectors, offset, edged_offset, pitch, eval_error, &pmv, fcode, quant, unrestricted); } /* Check the topright vector */ if(!pred_same && is_topright) { use_median &= check_vector(ref, current, shape, x, y, width, height, pvector_topright, vectors, offset, edged_offset, pitch, eval_error, &pmv, fcode, quant, unrestricted); } #if DEBUG fprintf(debug_log, "Best vector of the set : dx=%d dy=%d error=%u\n", vectors[0].dx, vectors[0].dy, vectors[0].error+vectors[1].error+vectors[2].error+vectors[3].error); #endif /* Step4 : check early exit */ sad_inter = vectors[0].error+vectors[1].error+ vectors[2].error+vectors[3].error+ mv_overhead(&pmv, vectors[0].dx, vectors[0].dy, fcode, quant); if(sad_inter < threshold1 || (vectors->dx == plast->dx && vectors->dy == plast->dy && sad_inter < sad_last)) { #if DEBUG fprintf(debug_log, "Early exit\n"); #endif memcpy(pvector, vectors, 4*sizeof(fame_motion_vector_t)); #if STAT motionstat.num_early++; #endif if((vectors[0].dx ^ vectors[1].dx) + (vectors[0].dx ^ vectors[2].dx) + (vectors[0].dx ^ vectors[3].dx) + (vectors[0].dy ^ vectors[1].dy) + (vectors[0].dy ^ vectors[2].dy) + (vectors[0].dy ^ vectors[3].dy)) return(motion_inter4v); else return(motion_inter); } /* integer sample search */ /* Step5 : The previous attempts were not successfull -> apply the diamond search algorithm with the initial vector equal to the best previous vector found */ if(pred_same || pmv.dx != 0 || pmv.dy != 0 || threshold2dx, vectors->dy, sad_inter); #endif /* store vectors for future prediction */ memcpy(pvector, vectors, 4*sizeof(fame_motion_vector_t)); /* intra/inter mode decision */ /* -> 1 - COMPUTE THE VARIANCE OF THE MACROBLOCK */ /* (estimated by absolute difference and not square diff.) */ /* The number of bits (at a given quality) needed by the DCT */ /* depends on the variance (in a first approximation) */ /* -> 2 - COMPARE WITH THE COVARIANCE GIVEN BY THE MOTION VECTOR */ /* The number of bits to code residual macroblock */ /* depends on the covariance (in a first approximation) */ /* -> If (1) < (2) - 2*N : Choose INTRA */ /* Substract 2*N to favour INTER mode when there is no */ /* significant difference */ /* TODO: maybe move half pel before decision */ if(mad_inter + count + count < sad_inter) { #if DEBUG fprintf(debug_log, "Coding = intra\n"); #endif memset(pvector, 0, 4*sizeof(fame_motion_vector_t)); return(motion_intra); } /* subvector (8x8) search */ if(motion->flags & FAME_MOTION_BLOCK_SEARCH) { for(k = 0; k < 4; k++) { /* TODO: k depends on shape */ /* integer sample search */ find_blockvector(ref, current, shape, offset[k], edged_offset[k], x, y, width, height, pitch, td_block_bbgds, range, 1, diamond_count, eval_error, &vectors[k], &pmv, fcode, quant, unrestricted); /* half sample search */ find_blockvector(ref, current, shape, offset[k], edged_offset[k], x, y, width, height, pitch, td_block_bbgds, range, 0, 1, eval_error, &vectors[k], &pmv, fcode, quant, unrestricted); } #if DEBUG for(i=0; i<4; i++) fprintf(debug_log, "Best 8x8 vector found (integer pixel) for the block %d: dx=%d dy=%d error=%u\n", i, vectors[i].dx, vectors[i].dy, vectors[i].error); #endif } /* half sample search */ find_macroblockvector(ref, current, shape, offset, edged_offset, x, y, width, height, pitch, td_block_bbgds, range, 0, 1, eval_error, pvector, &pmv, fcode, quant, unrestricted); #if DEBUG fprintf(debug_log, "After half pixel search on the macroblock : dx=%d dy=%d error=%u\n", pvector->dx, pvector->dy, pvector[0].error+pvector[1].error+pvector[2].error+pvector[3].error); #endif /* inter4v / inter decision */ sad_inter = pvector[0].error + pvector[1].error + pvector[2].error + pvector[3].error+mv_overhead(&pmv, pvector[0].dx, pvector[0].dy, fcode, quant); mad_inter = vectors[0].deviation + vectors[1].deviation + vectors[2].deviation + vectors[3].deviation; count = vectors[0].count + vectors[1].count + vectors[2].count + vectors[3].count; sad_inter4v = vectors[0].error + vectors[1].error + vectors[2].error + vectors[3].error + mv_overhead(&pmv, vectors[0].dx, vectors[0].dy, fcode, quant) + mv_overhead(&pmv, vectors[1].dx, vectors[1].dy, fcode, quant) + mv_overhead(&pmv, vectors[2].dx, vectors[2].dy, fcode, quant) + mv_overhead(&pmv, vectors[3].dx, vectors[3].dy, fcode, quant); #if DEBUG fprintf(debug_log, "Best 16x16 vector found : dx=%d dy=%d error=%u\n", pvector->dx, pvector->dy, pvector[0].error+pvector[1].error+pvector[2].error+pvector[3].error); for(i=0; i<4; i++) { fprintf(debug_log, "Best 8x8 vector found for the block %d : dx=%d dy=%d error=%u\n", i, vectors[i].dx, vectors[i].dy, vectors[i].error); } #endif /* inter4v/inter mode decision */ if((motion->flags & FAME_MOTION_BLOCK_SEARCH) && (sad_inter4v + ((count>>1)+1) < sad_inter)) { #if DEBUG fprintf(debug_log, "4 vectors\n"); #endif /* inter4v prediction */ sad_inter = sad_inter4v; motion_coding = motion_inter4v; } else { memcpy(vectors, pvector, 4*sizeof(fame_motion_vector_t)); motion_coding = motion_inter; } #if DEBUG fprintf(debug_log, "Coding = inter\n"); #endif return(motion_coding); } /* End of motion_pmvfast.c */