}
}
+static void clear_block_c(DCTELEM *block)
+{
+ memset(block, 0, sizeof(DCTELEM)*64);
+}
+
/**
* memset(blocks, 0, sizeof(DCTELEM)*6*64)
*/
c->sum_abs_dctelem = sum_abs_dctelem_c;
c->gmc1 = gmc1_c;
c->gmc = ff_gmc_c;
+ c->clear_block = clear_block_c;
c->clear_blocks = clear_blocks_c;
c->pix_sum = pix_sum_c;
c->pix_norm1 = pix_norm1_c;
*/
void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
+ void (*clear_block)(DCTELEM *block/*align 16*/);
void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
int (*pix_sum)(uint8_t * pix, int line_size);
int (*pix_norm1)(uint8_t * pix, int line_size);
for (i = 0; i < 6; i++) {
if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){
s->block_last_index[i]= -1;
- memset(s->block[i], 0, sizeof(DCTELEM)*64);
+ s->dsp.clear_block(s->block[i]);
}
}
}else{
for (i = 0; i < 6; i++) {
if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){
s->block_last_index[i]= -1;
- memset(s->block[i], 0, sizeof(DCTELEM)*64);
+ s->dsp.clear_block(s->block[i]);
}
}
}else{
rl = &rl_intra_aic;
i = 0;
s->gb= gb;
- memset(block, 0, sizeof(DCTELEM)*64);
+ s->dsp.clear_block(block);
goto retry;
}
av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d i:%d\n", s->mb_x, s->mb_y, s->mb_intra);
);
}
-static void clear_blocks_mmx(DCTELEM *blocks)
+#define CLEAR_BLOCKS(name,n) \
+static void name(DCTELEM *blocks)\
+{\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "mov %1, %%"REG_a" \n\t"\
+ "1: \n\t"\
+ "movq %%mm7, (%0, %%"REG_a") \n\t"\
+ "movq %%mm7, 8(%0, %%"REG_a") \n\t"\
+ "movq %%mm7, 16(%0, %%"REG_a") \n\t"\
+ "movq %%mm7, 24(%0, %%"REG_a") \n\t"\
+ "add $32, %%"REG_a" \n\t"\
+ " js 1b \n\t"\
+ : : "r" (((uint8_t *)blocks)+128*n),\
+ "i" (-128*n)\
+ : "%"REG_a\
+ );\
+}
+CLEAR_BLOCKS(clear_blocks_mmx, 6)
+CLEAR_BLOCKS(clear_block_mmx, 1)
+
+static void clear_block_sse(DCTELEM *block)
{
__asm__ volatile(
- "pxor %%mm7, %%mm7 \n\t"
- "mov $-128*6, %%"REG_a" \n\t"
- "1: \n\t"
- "movq %%mm7, (%0, %%"REG_a") \n\t"
- "movq %%mm7, 8(%0, %%"REG_a") \n\t"
- "movq %%mm7, 16(%0, %%"REG_a") \n\t"
- "movq %%mm7, 24(%0, %%"REG_a") \n\t"
- "add $32, %%"REG_a" \n\t"
- " js 1b \n\t"
- : : "r" (((uint8_t *)blocks)+128*6)
- : "%"REG_a
- );
+ "xorps %%xmm0, %%xmm0 \n"
+ "movaps %%xmm0, (%0) \n"
+ "movaps %%xmm0, 16(%0) \n"
+ "movaps %%xmm0, 32(%0) \n"
+ "movaps %%xmm0, 48(%0) \n"
+ "movaps %%xmm0, 64(%0) \n"
+ "movaps %%xmm0, 80(%0) \n"
+ "movaps %%xmm0, 96(%0) \n"
+ "movaps %%xmm0, 112(%0) \n"
+ :: "r"(block)
+ : "memory"
+ );
}
static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
c->put_pixels_clamped = put_pixels_clamped_mmx;
c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx;
c->add_pixels_clamped = add_pixels_clamped_mmx;
+ c->clear_block = clear_block_mmx;
c->clear_blocks = clear_blocks_mmx;
+ if (mm_flags & FF_MM_SSE)
+ c->clear_block = clear_block_sse;
#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \
int sign;
assert(w->orient<12);
- memset(s->block[0],0x00,64*sizeof(DCTELEM));
+ s->dsp.clear_block(s->block[0]);
if(chroma){
dc_mode=2;
DCTELEM *block = ctx->dct_block;
unsigned int pos;
- memset(block, 0, 64 * sizeof(DCTELEM));
+ ctx->dsp.clear_block(block);
block[0] = get_bits(&ctx->gb, 8) << 3;
int dc_index, int16_t *quant_matrix, int Al)
{
int val;
- memset(block, 0, 64*sizeof(DCTELEM));
+ s->dsp.clear_block(block);
val = mjpeg_decode_dc(s, dc_index);
if (val == 0xffff) {
av_log(s->avctx, AV_LOG_ERROR, "error dc\n");
if(s->interlaced && s->bottom_field)
ptr += linesize[c] >> 1;
if(!s->progressive) {
- memset(s->block, 0, sizeof(s->block));
+ s->dsp.clear_block(s->block);
if(decode_block(s, s->block, i,
s->dc_index[i], s->ac_index[i],
s->quant_matrixes[ s->quant_index[c] ]) < 0) {
/* dequantize the DCT coefficients */
if(s->avctx->idct_algo==FF_IDCT_VP3){
Coeff *coeff= s->coeffs + i;
- memset(block, 0, sizeof(block));
+ s->dsp.clear_block(block);
while(coeff->next){
block[coeff->index]= coeff->coeff * dequantizer[coeff->index];
coeff= coeff->next;
}
}else{
Coeff *coeff= s->coeffs + i;
- memset(block, 0, sizeof(block));
+ s->dsp.clear_block(block);
while(coeff->next){
block[coeff->index]= (coeff->coeff * dequantizer[coeff->index] + 2)>>2;
coeff= coeff->next;
mb_type = vp56_decode_mv(s, row, col);
ref_frame = vp56_reference_frame[mb_type];
- memset(s->block_coeff, 0, sizeof(s->block_coeff));
+ s->dsp.clear_blocks(*s->block_coeff);
s->parse_coeff(s);
case 1:
ff_simple_idct84_add(dst , stride, block1);
ff_simple_idct84_add(dst + 4*stride, stride, w->abt_block2[n]);
- memset(w->abt_block2[n], 0, 64*sizeof(DCTELEM));
+ s->dsp.clear_block(w->abt_block2[n]);
break;
case 2:
ff_simple_idct48_add(dst , stride, block1);
ff_simple_idct48_add(dst + 4 , stride, w->abt_block2[n]);
- memset(w->abt_block2[n], 0, 64*sizeof(DCTELEM));
+ s->dsp.clear_block(w->abt_block2[n]);
break;
default:
av_log(s->avctx, AV_LOG_ERROR, "internal error in WMV2 abt\n");