libavcodec/ppc/h264_altivec.c

   1 /*
   2  * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 #include "../dsputil.h"
  22
  23 #include "gcc_fixes.h"
  24
  25 #include "dsputil_altivec.h"
  26
  27 #define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
  28 #define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
  29
  30 #define OP_U8_ALTIVEC                          PUT_OP_U8_ALTIVEC
  31 #define PREFIX_h264_chroma_mc8_altivec         put_h264_chroma_mc8_altivec
  32 #define PREFIX_h264_chroma_mc8_num             altivec_put_h264_chroma_mc8_num
  33 #define PREFIX_h264_qpel16_h_lowpass_altivec   put_h264_qpel16_h_lowpass_altivec
  34 #define PREFIX_h264_qpel16_h_lowpass_num       altivec_put_h264_qpel16_h_lowpass_num
  35 #define PREFIX_h264_qpel16_v_lowpass_altivec   put_h264_qpel16_v_lowpass_altivec
  36 #define PREFIX_h264_qpel16_v_lowpass_num       altivec_put_h264_qpel16_v_lowpass_num
  37 #define PREFIX_h264_qpel16_hv_lowpass_altivec  put_h264_qpel16_hv_lowpass_altivec
  38 #define PREFIX_h264_qpel16_hv_lowpass_num      altivec_put_h264_qpel16_hv_lowpass_num
  39 #include "h264_template_altivec.c"
  40 #undef OP_U8_ALTIVEC
  41 #undef PREFIX_h264_chroma_mc8_altivec
  42 #undef PREFIX_h264_chroma_mc8_num
  43 #undef PREFIX_h264_qpel16_h_lowpass_altivec
  44 #undef PREFIX_h264_qpel16_h_lowpass_num
  45 #undef PREFIX_h264_qpel16_v_lowpass_altivec
  46 #undef PREFIX_h264_qpel16_v_lowpass_num
  47 #undef PREFIX_h264_qpel16_hv_lowpass_altivec
  48 #undef PREFIX_h264_qpel16_hv_lowpass_num
  49
  50 #define OP_U8_ALTIVEC                          AVG_OP_U8_ALTIVEC
  51 #define PREFIX_h264_chroma_mc8_altivec         avg_h264_chroma_mc8_altivec
  52 #define PREFIX_h264_chroma_mc8_num             altivec_avg_h264_chroma_mc8_num
  53 #define PREFIX_h264_qpel16_h_lowpass_altivec   avg_h264_qpel16_h_lowpass_altivec
  54 #define PREFIX_h264_qpel16_h_lowpass_num       altivec_avg_h264_qpel16_h_lowpass_num
  55 #define PREFIX_h264_qpel16_v_lowpass_altivec   avg_h264_qpel16_v_lowpass_altivec
  56 #define PREFIX_h264_qpel16_v_lowpass_num       altivec_avg_h264_qpel16_v_lowpass_num
  57 #define PREFIX_h264_qpel16_hv_lowpass_altivec  avg_h264_qpel16_hv_lowpass_altivec
  58 #define PREFIX_h264_qpel16_hv_lowpass_num      altivec_avg_h264_qpel16_hv_lowpass_num
  59 #include "h264_template_altivec.c"
  60 #undef OP_U8_ALTIVEC
  61 #undef PREFIX_h264_chroma_mc8_altivec
  62 #undef PREFIX_h264_chroma_mc8_num
  63 #undef PREFIX_h264_qpel16_h_lowpass_altivec
  64 #undef PREFIX_h264_qpel16_h_lowpass_num
  65 #undef PREFIX_h264_qpel16_v_lowpass_altivec
  66 #undef PREFIX_h264_qpel16_v_lowpass_num
  67 #undef PREFIX_h264_qpel16_hv_lowpass_altivec
  68 #undef PREFIX_h264_qpel16_hv_lowpass_num
  69
  70 #define H264_MC(OPNAME, SIZE, CODETYPE) \
  71 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, int stride){\
  72     OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
  73 }\
  74 \
  75 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \
  76     DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
  77     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  78     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
  79 }\
  80 \
  81 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  82     OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
  83 }\
  84 \
  85 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  86     DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
  87     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  88     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
  89 }\
  90 \
  91 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  92     DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
  93     put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  94     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
  95 }\
  96 \
  97 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  98     OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
  99 }\
 100 \
 101 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
 102     DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
 103     put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
 104     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
 105 }\
 106 \
 107 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
 108     DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
 109     DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
 110     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
 111     put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
 112     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
 113 }\
 114 \
 115 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
 116     DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
 117     DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
 118     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
 119     put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
 120     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
 121 }\
 122 \
 123 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
 124     DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
 125     DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
 126     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
 127     put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
 128     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
 129 }\
 130 \
 131 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
 132     DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
 133     DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
 134     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
 135     put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
 136     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
 137 }\
 138 \
 139 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
 140     DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
 141     OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
 142 }\
 143 \
 144 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
 145     DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
 146     DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
 147     DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
 148     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
 149     put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
 150     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
 151 }\
 152 \
 153 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
 154     DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
 155     DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
 156     DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
 157     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
 158     put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
 159     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
 160 }\
 161 \
 162 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
 163     DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
 164     DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
 165     DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
 166     put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
 167     put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
 168     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
 169 }\
 170 \
 171 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
 172     DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
 173     DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
 174     DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
 175     put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
 176     put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
 177     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
 178 }\
 179
 180 /* this code assume that stride % 16 == 0 */
 181 void put_no_rnd_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
 182     signed int ABCD[4] __attribute__((aligned(16))) =
 183                         {((8 - x) * (8 - y)),
 184                           ((x) * (8 - y)),
 185                           ((8 - x) * (y)),
 186                           ((x) * (y))};
 187     register int i;
 188     vector unsigned char fperm;
 189     const vector signed int vABCD = vec_ld(0, ABCD);
 190     const vector signed short vA = vec_splat((vector signed short)vABCD, 1);
 191     const vector signed short vB = vec_splat((vector signed short)vABCD, 3);
 192     const vector signed short vC = vec_splat((vector signed short)vABCD, 5);
 193     const vector signed short vD = vec_splat((vector signed short)vABCD, 7);
 194     const vector signed int vzero = vec_splat_s32(0);
 195     const vector signed short v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
 196     const vector unsigned short v6us = vec_splat_u16(6);
 197     register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
 198     register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
 199
 200     vector unsigned char vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;
 201     vector unsigned char vsrc0uc, vsrc1uc;
 202     vector signed short vsrc0ssH, vsrc1ssH;
 203     vector unsigned char vsrcCuc, vsrc2uc, vsrc3uc;
 204     vector signed short vsrc2ssH, vsrc3ssH, psum;
 205     vector unsigned char vdst, ppsum, vfdst, fsum;
 206
 207     if (((unsigned long)dst) % 16 == 0) {
 208       fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13,
 209                                         0x14, 0x15, 0x16, 0x17,
 210                                         0x08, 0x09, 0x0A, 0x0B,
 211                                         0x0C, 0x0D, 0x0E, 0x0F);
 212     } else {
 213       fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03,
 214                                         0x04, 0x05, 0x06, 0x07,
 215                                         0x18, 0x19, 0x1A, 0x1B,
 216                                         0x1C, 0x1D, 0x1E, 0x1F);
 217     }
 218
 219     vsrcAuc = vec_ld(0, src);
 220
 221     if (loadSecond)
 222       vsrcBuc = vec_ld(16, src);
 223     vsrcperm0 = vec_lvsl(0, src);
 224     vsrcperm1 = vec_lvsl(1, src);
 225
 226     vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
 227     if (reallyBadAlign)
 228       vsrc1uc = vsrcBuc;
 229     else
 230       vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
 231
 232     vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
 233                                                (vector unsigned char)vsrc0uc);
 234     vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
 235                                                (vector unsigned char)vsrc1uc);
 236
 237     if (!loadSecond) {// -> !reallyBadAlign
 238       for (i = 0 ; i < h ; i++) {
 239
 240
 241         vsrcCuc = vec_ld(stride + 0, src);
 242
 243         vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
 244         vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
 245
 246         vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
 247                                                 (vector unsigned char)vsrc2uc);
 248         vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
 249                                                 (vector unsigned char)vsrc3uc);
 250
 251         psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
 252         psum = vec_mladd(vB, vsrc1ssH, psum);
 253         psum = vec_mladd(vC, vsrc2ssH, psum);
 254         psum = vec_mladd(vD, vsrc3ssH, psum);
 255         psum = vec_add(v28ss, psum);
 256         psum = vec_sra(psum, v6us);
 257
 258         vdst = vec_ld(0, dst);
 259         ppsum = (vector unsigned char)vec_packsu(psum, psum);
 260         fsum = vec_perm(vdst, ppsum, fperm);
 261
 262         vec_st(fsum, 0, dst);
 263
 264         vsrc0ssH = vsrc2ssH;
 265         vsrc1ssH = vsrc3ssH;
 266
 267         dst += stride;
 268         src += stride;
 269       }
 270     } else {
 271         vector unsigned char vsrcDuc;
 272       for (i = 0 ; i < h ; i++) {
 273         vsrcCuc = vec_ld(stride + 0, src);
 274         vsrcDuc = vec_ld(stride + 16, src);
 275
 276         vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
 277         if (reallyBadAlign)
 278           vsrc3uc = vsrcDuc;
 279         else
 280           vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
 281
 282         vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
 283                                                 (vector unsigned char)vsrc2uc);
 284         vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
 285                                                 (vector unsigned char)vsrc3uc);
 286
 287         psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
 288         psum = vec_mladd(vB, vsrc1ssH, psum);
 289         psum = vec_mladd(vC, vsrc2ssH, psum);
 290         psum = vec_mladd(vD, vsrc3ssH, psum);
 291         psum = vec_add(v28ss, psum);
 292         psum = vec_sr(psum, v6us);
 293
 294         vdst = vec_ld(0, dst);
 295         ppsum = (vector unsigned char)vec_pack(psum, psum);
 296         fsum = vec_perm(vdst, ppsum, fperm);
 297
 298         vec_st(fsum, 0, dst);
 299
 300         vsrc0ssH = vsrc2ssH;
 301         vsrc1ssH = vsrc3ssH;
 302
 303         dst += stride;
 304         src += stride;
 305       }
 306     }
 307 }
 308
 309 static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
 310                                     const uint8_t * src2, int dst_stride,
 311                                     int src_stride1, int h)
 312 {
 313     int i;
 314     vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
 315
 316     mask_ = vec_lvsl(0, src2);
 317
 318     for (i = 0; i < h; i++) {
 319
 320         tmp1 = vec_ld(i * src_stride1, src1);
 321         mask = vec_lvsl(i * src_stride1, src1);
 322         tmp2 = vec_ld(i * src_stride1 + 15, src1);
 323
 324         a = vec_perm(tmp1, tmp2, mask);
 325
 326         tmp1 = vec_ld(i * 16, src2);
 327         tmp2 = vec_ld(i * 16 + 15, src2);
 328
 329         b = vec_perm(tmp1, tmp2, mask_);
 330
 331         tmp1 = vec_ld(0, dst);
 332         mask = vec_lvsl(0, dst);
 333         tmp2 = vec_ld(15, dst);
 334
 335         d = vec_avg(a, b);
 336
 337         edges = vec_perm(tmp2, tmp1, mask);
 338
 339         align = vec_lvsr(0, dst);
 340
 341         tmp2 = vec_perm(d, edges, align);
 342         tmp1 = vec_perm(edges, d, align);
 343
 344         vec_st(tmp2, 15, dst);
 345         vec_st(tmp1, 0 , dst);
 346
 347         dst += dst_stride;
 348     }
 349 }
 350
 351 static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
 352                                     const uint8_t * src2, int dst_stride,
 353                                     int src_stride1, int h)
 354 {
 355     int i;
 356     vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
 357
 358     mask_ = vec_lvsl(0, src2);
 359
 360     for (i = 0; i < h; i++) {
 361
 362         tmp1 = vec_ld(i * src_stride1, src1);
 363         mask = vec_lvsl(i * src_stride1, src1);
 364         tmp2 = vec_ld(i * src_stride1 + 15, src1);
 365
 366         a = vec_perm(tmp1, tmp2, mask);
 367
 368         tmp1 = vec_ld(i * 16, src2);
 369         tmp2 = vec_ld(i * 16 + 15, src2);
 370
 371         b = vec_perm(tmp1, tmp2, mask_);
 372
 373         tmp1 = vec_ld(0, dst);
 374         mask = vec_lvsl(0, dst);
 375         tmp2 = vec_ld(15, dst);
 376
 377         d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b));
 378
 379         edges = vec_perm(tmp2, tmp1, mask);
 380
 381         align = vec_lvsr(0, dst);
 382
 383         tmp2 = vec_perm(d, edges, align);
 384         tmp1 = vec_perm(edges, d, align);
 385
 386         vec_st(tmp2, 15, dst);
 387         vec_st(tmp1, 0 , dst);
 388
 389         dst += dst_stride;
 390     }
 391 }
 392
 393 /* Implemented but could be faster
 394 #define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h)
 395 #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
 396  */
 397
 398   H264_MC(put_, 16, altivec)
 399   H264_MC(avg_, 16, altivec)
 400
 401 void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
 402
 403 #ifdef HAVE_ALTIVEC
 404   if (has_altivec()) {
 405     c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
 406     c->put_no_rnd_h264_chroma_pixels_tab[0] = put_no_rnd_h264_chroma_mc8_altivec;
 407     c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
 408
 409 #define dspfunc(PFX, IDX, NUM) \
 410     c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
 411     c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
 412     c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
 413     c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
 414     c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
 415     c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
 416     c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
 417     c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
 418     c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
 419     c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
 420     c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
 421     c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
 422     c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
 423     c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
 424     c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
 425     c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
 426
 427     dspfunc(put_h264_qpel, 0, 16);
 428     dspfunc(avg_h264_qpel, 0, 16);
 429 #undef dspfunc
 430
 431   } else
 432 #endif /* HAVE_ALTIVEC */
 433   {
 434     // Non-AltiVec PPC optimisations
 435
 436     // ... pending ...
 437   }
 438 }