/* libfame - Fast Assembly MPEG Encoder Library Copyright (C) 2000-2001 Vivien Chappelier This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /**************************** mpeg encoder ***********************************/ #include #include #include #include #include "fame.h" #include "fame_encoder.h" #include "fame_encoder_mpeg.h" #include "table_scale.h" #if defined(HAS_MMX) #define arch_enter_state() #define arch_leave_state() asm("emms") #include "transpose_mmx.h" #include "dct_mmx.h" #include "quantize_mmx.h" #include "fetch_mmx.h" #else #define arch_enter_state() #define arch_leave_state() #include "dct_float.h" #include "quantize_float.h" #include "fetch_float.h" #endif #if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ <= 95 && __GNUC_PATCHLEVEL__ <= 3) /* gcc bug?? workaround */ extern void __fame_dummy_call(int q); #endif static void mpeg_init(fame_encoder_t *encoder, int width, int height, unsigned char *intra_quantisation_table, unsigned char *inter_quantisation_table, unsigned char *intra_dc_y_scale_table, unsigned char *intra_dc_c_scale_table, fame_mismatch_t mismatch_type); static void mpeg_enter(fame_encoder_t *encoder, fame_yuv_t **past_ref, fame_yuv_t **new_ref, fame_yuv_t **future_ref, fame_yuv_t *yuv, unsigned char *shape); static void mpeg_encode_intra_mb(fame_encoder_t *encoder, short x, short y, short *blocks[6], unsigned char q, fame_bab_t bab_type); static void mpeg_encode_inter_mb(fame_encoder_t *encoder, short x, short y, short *blocks[6], fame_motion_vector_t *forward, fame_motion_vector_t *backward, fame_motion_coding_t motion_coding, unsigned char q, fame_bab_t bab_type); static void mpeg_leave(fame_encoder_t *encoder); static void mpeg_close(fame_encoder_t *encoder); FAME_CONSTRUCTOR(fame_encoder_mpeg_t) { FAME_OBJECT(this)->name = "MPEG encoder"; FAME_ENCODER(this)->init = mpeg_init; FAME_ENCODER(this)->enter = mpeg_enter; FAME_ENCODER(this)->encode_intra_mb = mpeg_encode_intra_mb; FAME_ENCODER(this)->encode_inter_mb = mpeg_encode_inter_mb; FAME_ENCODER(this)->leave = mpeg_leave; FAME_ENCODER(this)->close = mpeg_close; return(this); } /* mpeg_init */ /* */ /* Description: */ /* Initialize the encoder. */ /* */ /* Arguments: */ /* fame_encoder_t *encoder: the encoder to initialize */ /* int width: width of the frame */ /* int height: height of the frame */ /* unsigned char *intra_quantisation_table: quantisation matrix for intra */ /* unsigned char *inter_quantisation_table: quantisation matrix for inter */ /* unsigned char *intra_dc_y_scale_table: quantisation table for DC of Y */ /* unsigned char *intra_dc_c_scale_table: quantisation table for DC of C */ /* fame_mismatch_t mismatch_type: type of mismatch control */ /* */ /* Return value: */ /* None. */ static void mpeg_init(fame_encoder_t *encoder, int width, int height, unsigned char *iqtable, unsigned char *niqtable, unsigned char *intra_dc_y_scale_table, unsigned char *intra_dc_c_scale_table, fame_mismatch_t mismatch_type) { fame_encoder_mpeg_t *encoder_mpeg = FAME_ENCODER_MPEG(encoder); int i, q; #ifdef HAS_MMX asm("emms"); #endif /* set width and height */ encoder_mpeg->width = width; encoder_mpeg->height = height; /* allocate padded shape buffer */ encoder_mpeg->padded = (unsigned char *) malloc(encoder_mpeg->width* encoder_mpeg->height); encoder_mpeg->mismatch = mismatch_type; /* compute quantization matrixes */ for(q = 1; q < 32; q++) { /* compute the intra quantisation and dequantisation DC scaler */ #ifdef HAS_MMX encoder_mpeg->yiqmatrixes[q][0] = (dct_t) ((double)(1UL<<16)*postscale[0]/intra_dc_y_scale_table[q]); encoder_mpeg->ciqmatrixes[q][0] = (dct_t) ((double)(1UL<<16)*postscale[0]/intra_dc_c_scale_table[q]); encoder_mpeg->yiqround[q][0] = (dct_t) ((double)intra_dc_y_scale_table[q]/(2*postscale[0])+0.5); encoder_mpeg->ciqround[q][0] = (dct_t) ((double)intra_dc_c_scale_table[q]/(2*postscale[0])+0.5); #else encoder_mpeg->yiqmatrixes[q][0] = postscale[0] / intra_dc_y_scale_table[q]; encoder_mpeg->ciqmatrixes[q][0] = postscale[0] / intra_dc_c_scale_table[q]; encoder_mpeg->yiqround[q][0] = ((dct_t) intra_dc_y_scale_table[q])/(2*postscale[0]); encoder_mpeg->ciqround[q][0] = ((dct_t) intra_dc_c_scale_table[q])/(2*postscale[0]); #endif /* compute the intra quantisation and dequantisation matrix */ for(i = 1; i < 64; i++) { #ifdef HAS_MMX #if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ <= 95 && __GNUC_PATCHLEVEL__ <= 3) //#error Your GCC is too old, and may produce bad code for libfame. /* gcc bug here?? try to comment/uncomment the following line*/ /* or was I wrong in some earlier asm directive??! */ /* force unoptimized access to q */ __fame_dummy_call(q); #endif encoder_mpeg->yiqmatrixes[q][i] = encoder_mpeg->ciqmatrixes[q][i] = (dct_t) ((double)(1UL<<19)*postscale[i] / (q*iqtable[i])); encoder_mpeg->yiqround[q][i] = encoder_mpeg->ciqround[q][i] = (dct_t) ((double)((1+(6*q+3)/4) * iqtable[i]) / (4 * 8.0 * postscale[i]) + 0.5); #else encoder_mpeg->yiqmatrixes[q][i] = encoder_mpeg->ciqmatrixes[q][i] = 8.0 * postscale[i] / (q * iqtable[i]); /* mpeg-4 rounding gives better rate-distortion results */ /* than mpeg-1 except maybe for q = 1 (need more tests) */ encoder_mpeg->yiqround[q][i] = encoder_mpeg->ciqround[q][i] = ((dct_t) (1+(6*q+3)/4) * iqtable[i]) / (4 * 8.0 * postscale[i]); #endif } /* compute the inter quantisation and dequantisation matrix */ for(i = 0; i < 64; i++) { #ifdef HAS_MMX encoder_mpeg->niqmatrixes[q][i] = (dct_t) ((double)(1UL<<19)*postscale[i]/(q*niqtable[i])); encoder_mpeg->niqround[q][i] = (dct_t) ((double)niqtable[i] / (4 * 8.0 * postscale[i]) + 0.5); #else encoder_mpeg->niqmatrixes[q][i] = 8.0 * postscale[i] / (q * niqtable[i]); encoder_mpeg->niqround[q][i] = (dct_t) niqtable[i] / (4 * 8.0 * postscale[i]); #endif } } } /* mpeg_enter */ /* */ /* Description: */ /* Start encoding a new picture. */ /* */ /* Arguments: */ /* fame_encoder_t *encoder: the encoder */ /* fame_yuv_t **past_ref: past reference images */ /* fame_yuv_t **new_ref: new reconstructed reference images */ /* fame_yuv_t **future_ref: future reference images */ /* fame_yuv_t *yuv: source image */ /* unsigned char *shape: shape binary mask */ /* */ /* Return value: */ /* None. */ static void mpeg_enter(fame_encoder_t *encoder, fame_yuv_t **past_ref, fame_yuv_t **new_ref, fame_yuv_t **future_ref, fame_yuv_t *yuv, unsigned char *shape) { fame_encoder_mpeg_t *encoder_mpeg = FAME_ENCODER_MPEG(encoder); /* Make pointers on the input frame and reference frame */ encoder_mpeg->input = yuv; encoder_mpeg->past_ref = past_ref; encoder_mpeg->new_ref = new_ref; encoder_mpeg->future_ref = future_ref; encoder_mpeg->shape = shape; arch_enter_state(); } /* mpeg_encode_intra_mb */ /* */ /* Description: */ /* Encode an intra macroblock. */ /* */ /* Arguments: */ /* fame_encoder_t *encoder: the encoder */ /* bitbuffer_t *bb: a bit buffer to write the resulting encoded data to. */ /* short x: the x location of the macroblock in macroblock units */ /* short y: the y location of the macroblock in macroblock units */ /* short *blocks[6]: the DCT coded blocks */ /* unsigned char q: the quantizer scale for this block */ /* fame_bab_t bab_type: binary alpha block type */ /* */ /* Return value: */ /* None. */ static void mpeg_encode_intra_mb(fame_encoder_t *encoder, short x, short y, short *blocks[6], unsigned char q, fame_bab_t bab_type) { fame_encoder_mpeg_t *encoder_mpeg = FAME_ENCODER_MPEG(encoder); unsigned long offset0, offset1, offset2, offset3, offset4, offset5; int i, pitch; void (* prefetch_Y)(unsigned char *input, dct_t *output, unsigned char *shape, int pitch); void (* prefetch_C)(unsigned char *input, dct_t *output, unsigned char *shape, int pitch); void (* dct_)(dct_t *block); void (* quantize_)(short *block, dct_t *qblock, dct_t *matrix, dct_t *round); pitch = encoder_mpeg->input->p; /* Make offsets to blocks */ offset0 = (y << 4) * pitch + (x << 4); /* Y(0,0) */ offset1 = offset0 + 8; /* Y(0,1) */ offset2 = offset0 + (pitch << 3); /* Y(1,0) */ offset3 = offset2 + 8; /* Y(1,1) */ offset4 = (y << 3) * (pitch >> 1) + (x << 3); /* Cb */ offset5 = (y << 3) * (pitch >> 1) + (x << 3); /* Cr */ /* Encode blocks */ for(i = 0; i < 6; i++) blocks[i] = encoder_mpeg->blocks[i]; if(bab_type != bab_all_coded) { prefetch_Y = prefetch_Y_withmask; prefetch_C = prefetch_C_withmask; } else { prefetch_Y = prefetch_withoutmask; prefetch_C = prefetch_withoutmask; } dct_ = dct; quantize_ = quantize; /* Y (0,0) */ prefetch_Y(encoder_mpeg->input->y + offset0, encoder_mpeg->tmpblock, encoder_mpeg->shape + offset0, pitch); dct_(encoder_mpeg->tmpblock); quantize_(encoder_mpeg->blocks[0], encoder_mpeg->tmpblock, encoder_mpeg->yiqmatrixes[q], encoder_mpeg->yiqround[q]); /* Y (0,1) */ prefetch_Y(encoder_mpeg->input->y + offset1, encoder_mpeg->tmpblock, encoder_mpeg->shape + offset1, pitch); dct_(encoder_mpeg->tmpblock); quantize_(encoder_mpeg->blocks[1], encoder_mpeg->tmpblock, encoder_mpeg->yiqmatrixes[q], encoder_mpeg->yiqround[q]); /* Y (1,0) */ prefetch_Y(encoder_mpeg->input->y + offset2, encoder_mpeg->tmpblock, encoder_mpeg->shape + offset2, pitch); dct_(encoder_mpeg->tmpblock); quantize_(encoder_mpeg->blocks[2], encoder_mpeg->tmpblock, encoder_mpeg->yiqmatrixes[q], encoder_mpeg->yiqround[q]); /* Y (1,1) */ prefetch_Y(encoder_mpeg->input->y + offset3, encoder_mpeg->tmpblock, encoder_mpeg->shape + offset3, pitch); dct_(encoder_mpeg->tmpblock); quantize_(encoder_mpeg->blocks[3], encoder_mpeg->tmpblock, encoder_mpeg->yiqmatrixes[q], encoder_mpeg->yiqround[q]); /* U */ prefetch_C(encoder_mpeg->input->u + offset4, encoder_mpeg->tmpblock, encoder_mpeg->shape + offset0, /* top left corner of mb */ pitch >> 1); dct_(encoder_mpeg->tmpblock); quantize_(encoder_mpeg->blocks[4], encoder_mpeg->tmpblock, encoder_mpeg->ciqmatrixes[q], encoder_mpeg->ciqround[q]); /* V */ prefetch_C(encoder_mpeg->input->v + offset5, encoder_mpeg->tmpblock, encoder_mpeg->shape + offset0, /* top left corner of mb */ pitch >> 1); dct_(encoder_mpeg->tmpblock); quantize_(encoder_mpeg->blocks[5], encoder_mpeg->tmpblock, encoder_mpeg->ciqmatrixes[q], encoder_mpeg->ciqround[q]); } /* mpeg_encode_inter_mb */ /* */ /* Description: */ /* Encode an inter macroblock. */ /* */ /* Arguments: */ /* fame_encoder_t *encoder: the encoder */ /* bitbuffer_t *bb: a bit buffer to write the resulting encoded data to. */ /* short x: the x location of the macroblock in macroblock units */ /* short y: the y location of the macroblock in macroblock units */ /* short *blocks[6]: the DCT coded blocks */ /* fame_bab_t bab_type: binary alpha block type */ /* fame_motion_vector_t *forward: forward motion vectors */ /* fame_motion_vector_t *backward: backward motion vectors */ /* unsigned char q: the quantizer scale for this block */ /* */ /* Return value: */ /* None. */ static void mpeg_encode_inter_mb(fame_encoder_t *encoder, short x, short y, short *blocks[6], fame_motion_vector_t *forward, fame_motion_vector_t *backward, fame_motion_coding_t motion_coding, unsigned char q, fame_bab_t bab_type) { fame_encoder_mpeg_t *encoder_mpeg = FAME_ENCODER_MPEG(encoder); unsigned long offset0, offset1, offset2, offset3, offset4, offset5; signed long motion0, motion1, motion2, motion3, motion4, motion5; signed long residual0, residual1, residual2, residual3, residual4, residual5; int i, pitch; void (* diff_)(unsigned char *input, unsigned char *ref, dct_t *output, int ipitch, int rpitch); void (* dct_)(dct_t *block); void (* quantize_)(short *block, dct_t *qblock, dct_t *matrix, dct_t *round); /* Make offsets to blocks */ pitch = encoder_mpeg->input->p; offset0 = (y << 4) * pitch + (x << 4); /* Y(0,0) */ offset1 = offset0 + 8; /* Y(0,1) */ offset2 = offset0 + (pitch << 3); /* Y(1,0) */ offset3 = offset2 + 8; /* Y(1,1) */ offset4 = (y << 3) * (pitch >> 1) + (x << 3); /* Cb */ offset5 = (y << 3) * (pitch >> 1) + (x << 3); /* Cr */ /* Compute motion offsets (motion is half-pixel coded) */ /* half-pel motion */ residual0 = ((forward[0].dy & 1) << 1) | (forward[0].dx & 1); residual1 = ((forward[1].dy & 1) << 1) | (forward[1].dx & 1); residual2 = ((forward[2].dy & 1) << 1) | (forward[2].dx & 1); residual3 = ((forward[3].dy & 1) << 1) | (forward[3].dx & 1); residual4 = ((forward[4].dy & 1) << 1) | (forward[4].dx & 1); residual5 = ((forward[5].dy & 1) << 1) | (forward[5].dx & 1); /* full-pel motion */ pitch = encoder_mpeg->future_ref[residual0]->p; motion0 = ((y<<4)+(forward[0].dy>>1) )*pitch+(forward[0].dx>>1)+(x<<4) ; pitch = encoder_mpeg->future_ref[residual1]->p; motion1 = ((y<<4)+(forward[1].dy>>1) )*pitch+(forward[1].dx>>1)+(x<<4)+8; pitch = encoder_mpeg->future_ref[residual2]->p; motion2 = ((y<<4)+(forward[2].dy>>1)+8)*pitch+(forward[2].dx>>1)+(x<<4) ; pitch = encoder_mpeg->future_ref[residual3]->p; motion3 = ((y<<4)+(forward[3].dy>>1)+8)*pitch+(forward[3].dx>>1)+(x<<4)+8; pitch = encoder_mpeg->future_ref[residual4]->p; motion4 = ((y<<3)+(forward[4].dy>>1))*(pitch>>1)+(forward[4].dx>>1)+(x<<3); pitch = encoder_mpeg->future_ref[residual5]->p; motion5 = ((y<<3)+(forward[5].dy>>1))*(pitch>>1)+(forward[5].dx>>1)+(x<<3); /* Encode blocks */ pitch = encoder_mpeg->input->p; for(i = 0; i < 6; i++) blocks[i] = encoder_mpeg->blocks[i]; diff_ = diff; dct_ = dct; quantize_ = quantize; /* Y */ if(forward[0].error < encoder_mpeg->quant_scale*16) blocks[0] = NULL; else { diff_(encoder_mpeg->input->y + offset0, encoder_mpeg->future_ref[residual0]->y + motion0, encoder_mpeg->tmpblock, pitch, pitch+32); dct_(encoder_mpeg->tmpblock); quantize_(encoder_mpeg->blocks[0], encoder_mpeg->tmpblock, encoder_mpeg->niqmatrixes[q], encoder_mpeg->niqround[q]); } if(forward[1].error < encoder_mpeg->quant_scale*16) blocks[1] = NULL; else { diff_(encoder_mpeg->input->y + offset1, encoder_mpeg->future_ref[residual1]->y + motion1, encoder_mpeg->tmpblock, pitch, pitch+32); dct_(encoder_mpeg->tmpblock); quantize_(encoder_mpeg->blocks[1], encoder_mpeg->tmpblock, encoder_mpeg->niqmatrixes[q], encoder_mpeg->niqround[q]); } if(forward[2].error < encoder_mpeg->quant_scale*16) blocks[2] = NULL; else { diff_(encoder_mpeg->input->y + offset2, encoder_mpeg->future_ref[residual2]->y + motion2, encoder_mpeg->tmpblock, pitch, pitch+32); dct_(encoder_mpeg->tmpblock); quantize_(encoder_mpeg->blocks[2], encoder_mpeg->tmpblock, encoder_mpeg->niqmatrixes[q], encoder_mpeg->niqround[q]); } if(forward[3].error < encoder_mpeg->quant_scale*16) blocks[3] = NULL; else { diff_(encoder_mpeg->input->y + offset3, encoder_mpeg->future_ref[residual3]->y + motion3, encoder_mpeg->tmpblock, pitch, pitch+32); dct_(encoder_mpeg->tmpblock); quantize_(encoder_mpeg->blocks[3], encoder_mpeg->tmpblock, encoder_mpeg->niqmatrixes[q], encoder_mpeg->niqround[q]); } /* U */ /* TODO: skip block with error < quant_scale*16 */ diff_(encoder_mpeg->input->u + offset4, encoder_mpeg->future_ref[residual4]->u + motion4, encoder_mpeg->tmpblock, pitch >> 1, (pitch+32) >> 1); dct_(encoder_mpeg->tmpblock); quantize_(encoder_mpeg->blocks[4], encoder_mpeg->tmpblock, encoder_mpeg->niqmatrixes[q], encoder_mpeg->niqround[q]); /* V */ /* TODO: skip block with error < quant_scale*16 */ diff_(encoder_mpeg->input->v + offset5, encoder_mpeg->future_ref[residual5]->v + motion5, encoder_mpeg->tmpblock, pitch >> 1, (pitch+32) >> 1); dct_(encoder_mpeg->tmpblock); quantize_(encoder_mpeg->blocks[5], encoder_mpeg->tmpblock, encoder_mpeg->niqmatrixes[q], encoder_mpeg->niqround[q]); } /* mpeg_leave */ /* */ /* Description: */ /* End the encoding of a picture. */ /* */ /* Arguments: */ /* fame_encoder_t *encoder: the encoder */ /* */ /* Return value: */ /* None. */ static void mpeg_leave(fame_encoder_t *encoder) { arch_leave_state(); } /* mpeg_close */ /* */ /* Description: */ /* Release the encoder. */ /* */ /* Arguments: */ /* fame_encoder_t *encoder: the encoder */ /* */ /* Return value: */ /* None. */ static void mpeg_close(fame_encoder_t *encoder) { fame_encoder_mpeg_t *encoder_mpeg = FAME_ENCODER_MPEG(encoder); /* free shape padding buffer */ free(encoder_mpeg->padded); }