asm="$2"
shift 2
check_cc "$@" <<EOF && enable $name || disable $name
-int foo(void){ asm volatile($asm); }
+int foo(void){ __asm__ volatile($asm); }
EOF
}
# base pointer is cleared in the inline assembly code.
check_exec_crash <<EOF && enable ebp_available
volatile int i=0;
- asm volatile (
+ __asm__ volatile (
"xorl %%ebp, %%ebp"
::: "%ebp");
return i;
# Find out if the .align argument is a power of two or not.
if test $asmalign_pot = "unknown"; then
disable asmalign_pot
- echo 'asm (".align 3");' | check_cc && enable asmalign_pot
+ echo '__asm__ (".align 3");' | check_cc && enable asmalign_pot
fi
enabled_any $DECODER_LIST && enable decoders
General Tips:
-------------
Use asm loops like:
-asm(
+__asm__(
"1: ....
...
"jump_instruciton ....
Do not use C loops:
do{
- asm(
+ __asm__(
...
}while()
-Use asm() instead of intrinsics. The latter requires a good optimizing compiler
+Use __asm__() instead of intrinsics. The latter requires a good optimizing compiler
which gcc is not.
#define implver __builtin_alpha_implver
#define rpcc __builtin_alpha_rpcc
#else
-#define prefetch(p) asm volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory")
-#define prefetch_en(p) asm volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory")
-#define prefetch_m(p) asm volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory")
-#define prefetch_men(p) asm volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory")
-#define cmpbge(a, b) ({ uint64_t __r; asm ("cmpbge %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
-#define extql(a, b) ({ uint64_t __r; asm ("extql %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
-#define extwl(a, b) ({ uint64_t __r; asm ("extwl %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
-#define extqh(a, b) ({ uint64_t __r; asm ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
-#define zap(a, b) ({ uint64_t __r; asm ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
-#define zapnot(a, b) ({ uint64_t __r; asm ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
-#define amask(a) ({ uint64_t __r; asm ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; })
-#define implver() ({ uint64_t __r; asm ("implver %0" : "=r" (__r)); __r; })
-#define rpcc() ({ uint64_t __r; asm volatile ("rpcc %0" : "=r" (__r)); __r; })
+#define prefetch(p) __asm__ volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory")
+#define prefetch_en(p) __asm__ volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory")
+#define prefetch_m(p) __asm__ volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory")
+#define prefetch_men(p) __asm__ volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory")
+#define cmpbge(a, b) ({ uint64_t __r; __asm__ ("cmpbge %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
+#define extql(a, b) ({ uint64_t __r; __asm__ ("extql %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
+#define extwl(a, b) ({ uint64_t __r; __asm__ ("extwl %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
+#define extqh(a, b) ({ uint64_t __r; __asm__ ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
+#define zap(a, b) ({ uint64_t __r; __asm__ ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
+#define zapnot(a, b) ({ uint64_t __r; __asm__ ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
+#define amask(a) ({ uint64_t __r; __asm__ ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; })
+#define implver() ({ uint64_t __r; __asm__ ("implver %0" : "=r" (__r)); __r; })
+#define rpcc() ({ uint64_t __r; __asm__ volatile ("rpcc %0" : "=r" (__r)); __r; })
#endif
-#define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory")
+#define wh64(p) __asm__ volatile("wh64 (%0)" : : "r"(p) : "memory")
#if GNUC_PREREQ(3,3) && defined(__alpha_max__)
#define minub8 __builtin_alpha_minub8
#define unpkbl __builtin_alpha_unpkbl
#define unpkbw __builtin_alpha_unpkbw
#else
-#define minub8(a, b) ({ uint64_t __r; asm (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define minsb8(a, b) ({ uint64_t __r; asm (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define minuw4(a, b) ({ uint64_t __r; asm (".arch ev6; minuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define minsw4(a, b) ({ uint64_t __r; asm (".arch ev6; minsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define maxub8(a, b) ({ uint64_t __r; asm (".arch ev6; maxub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define maxsb8(a, b) ({ uint64_t __r; asm (".arch ev6; maxsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define maxuw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define maxsw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define perr(a, b) ({ uint64_t __r; asm (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; })
-#define pklb(a) ({ uint64_t __r; asm (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
-#define pkwb(a) ({ uint64_t __r; asm (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
-#define unpkbl(a) ({ uint64_t __r; asm (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
-#define unpkbw(a) ({ uint64_t __r; asm (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
+#define minub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define minsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define minuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define minsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define maxub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define maxsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define maxuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define maxsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define perr(a, b) ({ uint64_t __r; __asm__ (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; })
+#define pklb(a) ({ uint64_t __r; __asm__ (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
+#define pkwb(a) ({ uint64_t __r; __asm__ (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
+#define unpkbl(a) ({ uint64_t __r; __asm__ (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
+#define unpkbw(a) ({ uint64_t __r; __asm__ (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
#endif
#elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */
#define ldl(p) (*(const int32_t *) (p))
#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0)
#define stl(l, p) do { *(int32_t *) (p) = (l); } while (0)
-#define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a)
+#define ldq_u(a) __asm__ ("ldq_u %v0,0(%a0)", a)
#define uldq(a) (*(const __unaligned uint64_t *) (a))
-#define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b)
-#define extql(a, b) asm ("extql %a0,%a1,%v0", a, b)
-#define extwl(a, b) asm ("extwl %a0,%a1,%v0", a, b)
-#define extqh(a, b) asm ("extqh %a0,%a1,%v0", a, b)
-#define zap(a, b) asm ("zap %a0,%a1,%v0", a, b)
-#define zapnot(a, b) asm ("zapnot %a0,%a1,%v0", a, b)
-#define amask(a) asm ("amask %a0,%v0", a)
-#define implver() asm ("implver %v0")
-#define rpcc() asm ("rpcc %v0")
-#define minub8(a, b) asm ("minub8 %a0,%a1,%v0", a, b)
-#define minsb8(a, b) asm ("minsb8 %a0,%a1,%v0", a, b)
-#define minuw4(a, b) asm ("minuw4 %a0,%a1,%v0", a, b)
-#define minsw4(a, b) asm ("minsw4 %a0,%a1,%v0", a, b)
-#define maxub8(a, b) asm ("maxub8 %a0,%a1,%v0", a, b)
-#define maxsb8(a, b) asm ("maxsb8 %a0,%a1,%v0", a, b)
-#define maxuw4(a, b) asm ("maxuw4 %a0,%a1,%v0", a, b)
-#define maxsw4(a, b) asm ("maxsw4 %a0,%a1,%v0", a, b)
-#define perr(a, b) asm ("perr %a0,%a1,%v0", a, b)
-#define pklb(a) asm ("pklb %a0,%v0", a)
-#define pkwb(a) asm ("pkwb %a0,%v0", a)
-#define unpkbl(a) asm ("unpkbl %a0,%v0", a)
-#define unpkbw(a) asm ("unpkbw %a0,%v0", a)
-#define wh64(a) asm ("wh64 %a0", a)
+#define cmpbge(a, b) __asm__ ("cmpbge %a0,%a1,%v0", a, b)
+#define extql(a, b) __asm__ ("extql %a0,%a1,%v0", a, b)
+#define extwl(a, b) __asm__ ("extwl %a0,%a1,%v0", a, b)
+#define extqh(a, b) __asm__ ("extqh %a0,%a1,%v0", a, b)
+#define zap(a, b) __asm__ ("zap %a0,%a1,%v0", a, b)
+#define zapnot(a, b) __asm__ ("zapnot %a0,%a1,%v0", a, b)
+#define amask(a) __asm__ ("amask %a0,%v0", a)
+#define implver() __asm__ ("implver %v0")
+#define rpcc() __asm__ ("rpcc %v0")
+#define minub8(a, b) __asm__ ("minub8 %a0,%a1,%v0", a, b)
+#define minsb8(a, b) __asm__ ("minsb8 %a0,%a1,%v0", a, b)
+#define minuw4(a, b) __asm__ ("minuw4 %a0,%a1,%v0", a, b)
+#define minsw4(a, b) __asm__ ("minsw4 %a0,%a1,%v0", a, b)
+#define maxub8(a, b) __asm__ ("maxub8 %a0,%a1,%v0", a, b)
+#define maxsb8(a, b) __asm__ ("maxsb8 %a0,%a1,%v0", a, b)
+#define maxuw4(a, b) __asm__ ("maxuw4 %a0,%a1,%v0", a, b)
+#define maxsw4(a, b) __asm__ ("maxsw4 %a0,%a1,%v0", a, b)
+#define perr(a, b) __asm__ ("perr %a0,%a1,%v0", a, b)
+#define pklb(a) __asm__ ("pklb %a0,%v0", a)
+#define pkwb(a) __asm__ ("pkwb %a0,%v0", a)
+#define unpkbl(a) __asm__ ("unpkbl %a0,%v0", a)
+#define unpkbw(a) __asm__ ("unpkbw %a0,%v0", a)
+#define wh64(a) __asm__ ("wh64 %a0", a)
#else
#error "Unknown compiler!"
static void add_pixels_clamped_ARM(short *block, unsigned char *dest, int line_size)
{
- asm volatile (
+ __asm__ volatile (
"mov r10, #8 \n\t"
"1: \n\t"
#ifdef HAVE_ARMV5TE
static void prefetch_arm(void *mem, int stride, int h)
{
- asm volatile(
+ __asm__ volatile(
"1: \n\t"
"subs %0, %0, #1 \n\t"
"pld [%1] \n\t"
#include "libavcodec/dsputil.h"
#define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt
-#define SET_RND(regd) asm volatile ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12");
+#define SET_RND(regd) __asm__ volatile ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12");
#define WAVG2B "wavg2b"
#include "dsputil_iwmmxt_rnd.h"
#undef DEF
#undef WAVG2B
#define DEF(x, y) x ## _ ## y ##_iwmmxt
-#define SET_RND(regd) asm volatile ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12");
+#define SET_RND(regd) __asm__ volatile ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12");
#define WAVG2B "wavg2br"
#include "dsputil_iwmmxt_rnd.h"
#undef DEF
// need scheduling
#define OP(AVG) \
- asm volatile ( \
+ __asm__ volatile ( \
/* alignment */ \
"and r12, %[pixels], #7 \n\t" \
"bic %[pixels], %[pixels], #7 \n\t" \
{
uint8_t *pixels2 = pixels + line_size;
- asm volatile (
+ __asm__ volatile (
"mov r12, #4 \n\t"
"1: \n\t"
"pld [%[pixels], %[line_size2]] \n\t"
static void clear_blocks_iwmmxt(DCTELEM *blocks)
{
- asm volatile(
+ __asm__ volatile(
"wzero wr0 \n\t"
"mov r1, #(128 * 6 / 32) \n\t"
"1: \n\t"
void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
int stride = line_size;
- asm volatile (
+ __asm__ volatile (
"and r12, %[pixels], #7 \n\t"
"bic %[pixels], %[pixels], #7 \n\t"
"tmcr wcgr1, r12 \n\t"
void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
int stride = line_size;
- asm volatile (
+ __asm__ volatile (
"and r12, %[pixels], #7 \n\t"
"bic %[pixels], %[pixels], #7 \n\t"
"tmcr wcgr1, r12 \n\t"
void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
int stride = line_size;
- asm volatile (
+ __asm__ volatile (
"and r12, %[pixels], #7 \n\t"
"bic %[pixels], %[pixels], #7 \n\t"
"tmcr wcgr1, r12 \n\t"
void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
int stride = line_size;
- asm volatile (
+ __asm__ volatile (
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"pld [%[block]] \n\t"
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- asm volatile(
+ __asm__ volatile(
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"and r12, %[pixels], #7 \n\t"
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- asm volatile(
+ __asm__ volatile(
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"and r12, %[pixels], #7 \n\t"
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- asm volatile(
+ __asm__ volatile(
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"pld [%[block]] \n\t"
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- asm volatile(
+ __asm__ volatile(
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"pld [%[block]] \n\t"
int stride = line_size;
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
- asm volatile(
+ __asm__ volatile(
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"and r12, %[pixels], #7 \n\t"
int stride = line_size;
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
- asm volatile(
+ __asm__ volatile(
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"and r12, %[pixels], #7 \n\t"
int stride = line_size;
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
- asm volatile(
+ __asm__ volatile(
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"and r12, %[pixels], #7 \n\t"
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- asm volatile(
+ __asm__ volatile(
"pld [%[pixels]] \n\t"
"mov r12, #2 \n\t"
"pld [%[pixels], #32] \n\t"
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- asm volatile(
+ __asm__ volatile(
"pld [%[pixels]] \n\t"
"mov r12, #2 \n\t"
"pld [%[pixels], #32] \n\t"
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- asm volatile(
+ __asm__ volatile(
"pld [%[block]] \n\t"
"pld [%[block], #32] \n\t"
"pld [%[pixels]] \n\t"
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- asm volatile(
+ __asm__ volatile(
"pld [%[block]] \n\t"
"pld [%[block], #32] \n\t"
"pld [%[pixels]] \n\t"
static void vector_fmul_vfp(float *dst, const float *src, int len)
{
int tmp;
- asm volatile(
+ __asm__ volatile(
"fmrx %[tmp], fpscr\n\t"
"orr %[tmp], %[tmp], #(3 << 16)\n\t" /* set vector size to 4 */
"fmxr fpscr, %[tmp]\n\t"
static void vector_fmul_reverse_vfp(float *dst, const float *src0, const float *src1, int len)
{
src1 += len;
- asm volatile(
+ __asm__ volatile(
"fldmdbs %[src1]!, {s0-s3}\n\t"
"fldmias %[src0]!, {s8-s11}\n\t"
"fldmdbs %[src1]!, {s4-s7}\n\t"
*/
void float_to_int16_vfp(int16_t *dst, const float *src, int len)
{
- asm volatile(
+ __asm__ volatile(
"fldmias %[src]!, {s16-s23}\n\t"
"ftosis s0, s16\n\t"
"ftosis s1, s17\n\t"
#ifdef FRAC_BITS
# define MULL(a, b) \
({ int lo, hi;\
- asm("smull %0, %1, %2, %3 \n\t"\
+ __asm__("smull %0, %1, %2, %3 \n\t"\
"mov %0, %0, lsr %4\n\t"\
"add %1, %0, %1, lsl %5\n\t"\
: "=&r"(lo), "=&r"(hi)\
static inline av_const int MULH(int a, int b)
{
int r;
- asm ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
+ __asm__ ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
return r;
}
#define MULH MULH
#else
#define MULH(a, b) \
({ int lo, hi;\
- asm ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a));\
+ __asm__ ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a));\
hi; })
#endif
static inline av_const int64_t MUL64(int a, int b)
{
union { uint64_t x; unsigned hl[2]; } x;
- asm ("smull %0, %1, %2, %3"
+ __asm__ ("smull %0, %1, %2, %3"
: "=r"(x.hl[0]), "=r"(x.hl[1]) : "r"(a), "r"(b));
return x.x;
}
static inline av_const int64_t MAC64(int64_t d, int a, int b)
{
union { uint64_t x; unsigned hl[2]; } x = { d };
- asm ("smlal %0, %1, %2, %3"
+ __asm__ ("smlal %0, %1, %2, %3"
: "+r"(x.hl[0]), "+r"(x.hl[1]) : "r"(a), "r"(b));
return x.x;
}
/* signed 16x16 -> 32 multiply add accumulate */
# define MAC16(rt, ra, rb) \
- asm ("smlabb %0, %2, %3, %0" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
+ __asm__ ("smlabb %0, %2, %3, %0" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
/* signed 16x16 -> 32 multiply */
# define MUL16(ra, rb) \
({ int __rt; \
- asm ("smulbb %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); \
+ __asm__ ("smulbb %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); \
__rt; })
#endif
({ DCTELEM *xblock = xxblock; \
int xqmul = xxqmul, xqadd = xxqadd, xcount = xxcount, xtmp; \
int xdata1, xdata2; \
-asm volatile( \
+__asm__ volatile( \
"subs %[count], %[count], #2 \n\t" \
"ble 2f \n\t" \
"ldrd r4, [%[block], #0] \n\t" \
else
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
- asm volatile (
+ __asm__ volatile (
/* "movd %1, %%mm6 \n\t" //qmul */
/* "packssdw %%mm6, %%mm6 \n\t" */
/* "packssdw %%mm6, %%mm6 \n\t" */
{
// This is just a simple memset.
//
- asm("P0=192; "
+ __asm__("P0=192; "
"I0=%0; "
"R0=0; "
"LSETUP(clear_blocks_blkfn_lab,clear_blocks_blkfn_lab)LC0=P0;"
#ifdef CONFIG_MPEGAUDIO_HP
#define MULH(X,Y) ({ int xxo; \
- asm ( \
+ __asm__ ( \
"a1 = %2.L * %1.L (FU);\n\t" \
"a1 = a1 >> 16;\n\t" \
"a1 += %2.H * %1.L (IS,M);\n\t" \
: "=d" (xxo) : "d" (X), "d" (Y) : "A0","A1"); xxo; })
#else
#define MULH(X,Y) ({ int xxo; \
- asm ( \
+ __asm__ ( \
"a1 = %2.H * %1.L (IS,M);\n\t" \
"a0 = %1.H * %2.H, a1+= %1.H * %2.L (IS,M);\n\t"\
"a1 = a1 >>> 16;\n\t" \
/* signed 16x16 -> 32 multiply */
#define MUL16(a, b) ({ int xxo; \
- asm ( \
+ __asm__ ( \
"%0 = %1.l*%2.l (is);\n\t" \
: "=W" (xxo) : "d" (a), "d" (b) : "A1"); \
xxo; })
/* block[i] = level; */
/* } */
- asm volatile
+ __asm__ volatile
("i2=%1;\n\t"
"r1=[%1++]; \n\t"
"r0=r1>>>15 (v); \n\t"
PROF("zzscan",5);
- asm volatile
+ __asm__ volatile
("r0=b[%1--] (x); \n\t"
"lsetup (0f,1f) lc0=%3; \n\t" /* for(i=63; i>=start_i; i--) { */
"0: p0=r0; \n\t" /* j = scantable[i]; */
#if defined(ARCH_X86)
// avoid +32 for shift optimization (gcc should do that ...)
static inline int32_t NEG_SSR32( int32_t a, int8_t s){
- asm ("sarl %1, %0\n\t"
+ __asm__ ("sarl %1, %0\n\t"
: "+r" (a)
: "ic" ((uint8_t)(-s))
);
return a;
}
static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
- asm ("shrl %1, %0\n\t"
+ __asm__ ("shrl %1, %0\n\t"
: "+r" (a)
: "ic" ((uint8_t)(-s))
);
{
# ifdef ALIGNED_BITSTREAM_WRITER
# if defined(ARCH_X86)
- asm volatile(
+ __asm__ volatile(
"movl %0, %%ecx \n\t"
"xorl %%eax, %%eax \n\t"
"shrdl %%cl, %1, %%eax \n\t"
# endif
# else //ALIGNED_BITSTREAM_WRITER
# if defined(ARCH_X86)
- asm volatile(
+ __asm__ volatile(
"movl $7, %%ecx \n\t"
"andl %0, %%ecx \n\t"
"addl %3, %%ecx \n\t"
#if defined(ARCH_X86)
# define SKIP_CACHE(name, gb, num)\
- asm(\
+ __asm__(\
"shldl %2, %1, %0 \n\t"\
"shll %2, %1 \n\t"\
: "+r" (name##_cache0), "+r" (name##_cache1)\
int temp;
#if 0
//P3:683 athlon:475
- asm(
+ __asm__(
"lea -0x100(%0), %2 \n\t"
"shr $31, %2 \n\t" //FIXME 31->63 for x86-64
"shl %%cl, %0 \n\t"
);
#elif 0
//P3:680 athlon:474
- asm(
+ __asm__(
"cmp $0x100, %0 \n\t"
"setb %%cl \n\t" //FIXME 31->63 for x86-64
"shl %%cl, %0 \n\t"
#elif 1
int temp2;
//P3:665 athlon:517
- asm(
+ __asm__(
"lea -0x100(%0), %%eax \n\t"
"cltd \n\t"
"mov %0, %%eax \n\t"
#elif 0
int temp2;
//P3:673 athlon:509
- asm(
+ __asm__(
"cmp $0x100, %0 \n\t"
"sbb %%edx, %%edx \n\t"
"mov %0, %%eax \n\t"
#else
int temp2;
//P3:677 athlon:511
- asm(
+ __asm__(
"cmp $0x100, %0 \n\t"
"lea (%0, %0), %%eax \n\t"
"lea (%1, %1), %%edx \n\t"
int bit;
#ifndef BRANCHLESS_CABAC_DECODER
- asm volatile(
+ __asm__ volatile(
"movzbl (%1), %0 \n\t"
"movl "RANGE "(%2), %%ebx \n\t"
"movl "RANGE "(%2), %%edx \n\t"
"add "tmp" , "low" \n\t"\
"1: \n\t"
- asm volatile(
+ __asm__ volatile(
"movl "RANGE "(%2), %%esi \n\t"
"movl "LOW "(%2), %%ebx \n\t"
BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl")
static int av_unused get_cabac_bypass(CABACContext *c){
#if 0 //not faster
int bit;
- asm volatile(
+ __asm__ volatile(
"movl "RANGE "(%1), %%ebx \n\t"
"movl "LOW "(%1), %%eax \n\t"
"shl $17, %%ebx \n\t"
static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
#if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
- asm volatile(
+ __asm__ volatile(
"movl "RANGE "(%1), %%ebx \n\t"
"movl "LOW "(%1), %%eax \n\t"
"shl $17, %%ebx \n\t"
{
#ifdef HAVE_MMX
if (cpu_flags & MM_MMX)
- asm volatile ("emms\n\t");
+ __asm__ volatile ("emms\n\t");
#endif
}
static inline void emms(void)
{
- asm volatile ("emms;":::"memory");
+ __asm__ volatile ("emms;":::"memory");
}
static inline void cavs_idct8_1d(int16_t *block, uint64_t bias)
{
- asm volatile(
+ __asm__ volatile(
"movq 112(%0), %%mm4 \n\t" /* mm4 = src7 */
"movq 16(%0), %%mm5 \n\t" /* mm5 = src1 */
"movq 80(%0), %%mm2 \n\t" /* mm2 = src5 */
cavs_idct8_1d(block+4*i, ff_pw_4);
- asm volatile(
+ __asm__ volatile(
"psraw $3, %%mm7 \n\t"
"psraw $3, %%mm6 \n\t"
"psraw $3, %%mm5 \n\t"
for(i=0; i<2; i++){
cavs_idct8_1d(b2+4*i, ff_pw_64);
- asm volatile(
+ __asm__ volatile(
"psraw $7, %%mm7 \n\t"
"psraw $7, %%mm6 \n\t"
"psraw $7, %%mm5 \n\t"
add_pixels_clamped_mmx(b2, dst, stride);
/* clear block */
- asm volatile(
+ __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t"
"movq %%mm7, (%0) \n\t"
"movq %%mm7, 8(%0) \n\t"
src -= 2*srcStride;\
\
while(w--){\
- asm volatile(\
+ __asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"movd (%0), %%mm0 \n\t"\
"add %2, %0 \n\t"\
: "memory"\
);\
if(h==16){\
- asm volatile(\
+ __asm__ volatile(\
VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
#define QPEL_CAVS(OPNAME, OP, MMX)\
static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
int h=8;\
- asm volatile(\
+ __asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"movq %5, %%mm6 \n\t"\
"1: \n\t"\
/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
#define cpuid(index,eax,ebx,ecx,edx)\
- asm volatile\
+ __asm__ volatile\
("mov %%"REG_b", %%"REG_S"\n\t"\
"cpuid\n\t"\
"xchg %%"REG_b", %%"REG_S\
int max_std_level, max_ext_level, std_caps=0, ext_caps=0;
x86_reg a, c;
- asm volatile (
+ __asm__ volatile (
/* See if CPUID instruction is supported ... */
/* ... Get copies of EFLAGS into eax and ecx */
"pushf\n\t"
rnd_reg = rnd ? &ff_pw_4 : &ff_pw_3;
- asm volatile(
+ __asm__ volatile(
"movd %0, %%mm5\n\t"
"movq %1, %%mm4\n\t"
"movq %2, %%mm6\n\t" /* mm6 = rnd */
:: "rm"(x+y), "m"(ff_pw_8), "m"(*rnd_reg));
for(i=0; i<h; i++) {
- asm volatile(
+ __asm__ volatile(
/* mm0 = src[0..7], mm1 = src[1..8] */
"movq %0, %%mm0\n\t"
"movq %1, %%mm2\n\t"
:: "m"(src[0]), "m"(src[dxy]));
- asm volatile(
+ __asm__ volatile(
/* [mm0,mm1] = A * src[0..7] */
/* [mm2,mm3] = B * src[1..8] */
"movq %%mm0, %%mm1\n\t"
/* general case, bilinear */
rnd_reg = rnd ? &ff_pw_32.a : &ff_pw_28.a;
- asm volatile("movd %2, %%mm4\n\t"
+ __asm__ volatile("movd %2, %%mm4\n\t"
"movd %3, %%mm6\n\t"
"punpcklwd %%mm4, %%mm4\n\t"
"punpcklwd %%mm6, %%mm6\n\t"
"movq %%mm4, %0\n\t"
: "=m" (AA), "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64));
- asm volatile(
+ __asm__ volatile(
/* mm0 = src[0..7], mm1 = src[1..8] */
"movq %0, %%mm0\n\t"
"movq %1, %%mm1\n\t"
for(i=0; i<h; i++) {
src += stride;
- asm volatile(
+ __asm__ volatile(
/* mm2 = A * src[0..3] + B * src[1..4] */
/* mm3 = A * src[4..7] + B * src[5..8] */
"movq %%mm0, %%mm2\n\t"
"paddw %%mm0, %%mm3\n\t"
: : "m" (AA));
- asm volatile(
+ __asm__ volatile(
/* [mm2,mm3] += C * src[0..7] */
"movq %0, %%mm0\n\t"
"movq %%mm0, %%mm1\n\t"
"paddw %%mm1, %%mm3\n\t"
: : "m" (src[0]));
- asm volatile(
+ __asm__ volatile(
/* [mm2,mm3] += D * src[1..8] */
"movq %1, %%mm1\n\t"
"movq %%mm1, %%mm0\n\t"
"movq %0, %%mm0\n\t"
: : "m" (src[0]), "m" (src[1]), "m" (DD));
- asm volatile(
+ __asm__ volatile(
/* dst[0..7] = ([mm2,mm3] + 32) >> 6 */
"paddw %1, %%mm2\n\t"
"paddw %1, %%mm3\n\t"
static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
{
- asm volatile(
+ __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t"
"movd %5, %%mm2 \n\t"
"movd %6, %%mm3 \n\t"
int tmp = ((1<<16)-1)*x + 8;
int CD= tmp*y;
int AB= (tmp<<3) - CD;
- asm volatile(
+ __asm__ volatile(
/* mm5 = {A,B,A,B} */
/* mm6 = {C,D,C,D} */
"movd %0, %%mm5\n\t"
:: "r"(AB), "r"(CD), "m"(src[0]));
- asm volatile(
+ __asm__ volatile(
"1:\n\t"
"add %4, %1\n\t"
/* mm1 = A * src[0,1] + B * src[1,2] */
if(y==0 || x==0)
{
/* 1 dimensional filter only */
- asm volatile(
+ __asm__ volatile(
"movd %0, %%xmm7 \n\t"
"movq %1, %%xmm6 \n\t"
"pshuflw $0, %%xmm7, %%xmm7 \n\t"
);
if(x) {
- asm volatile(
+ __asm__ volatile(
"1: \n\t"
"movq (%1), %%xmm0 \n\t"
"movq 1(%1), %%xmm1 \n\t"
:"r"((x86_reg)stride)
);
} else {
- asm volatile(
+ __asm__ volatile(
"1: \n\t"
"movq (%1), %%xmm0 \n\t"
"movq (%1,%3), %%xmm1 \n\t"
}
/* general case, bilinear */
- asm volatile(
+ __asm__ volatile(
"movd %0, %%xmm7 \n\t"
"movd %1, %%xmm6 \n\t"
"movdqa %2, %%xmm5 \n\t"
:: "r"((x*255+8)*(8-y)), "r"((x*255+8)*y), "m"(*(rnd?&ff_pw_32:&ff_pw_28))
);
- asm volatile(
+ __asm__ volatile(
"movq (%1), %%xmm0 \n\t"
"movq 1(%1), %%xmm1 \n\t"
"punpcklbw %%xmm1, %%xmm0 \n\t"
static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
{
- asm volatile(
+ __asm__ volatile(
"movd %0, %%mm7 \n\t"
"movd %1, %%mm6 \n\t"
"movq %2, %%mm5 \n\t"
:: "r"((x*255+8)*(8-y)), "r"((x*255+8)*y), "m"(ff_pw_32)
);
- asm volatile(
+ __asm__ volatile(
"movd (%1), %%mm0 \n\t"
"punpcklbw 1(%1), %%mm0 \n\t"
"add %3, %1 \n\t"
DECLARE_ALIGNED_16(const double, ff_pd_1[2]) = { 1.0, 1.0 };
DECLARE_ALIGNED_16(const double, ff_pd_2[2]) = { 2.0, 2.0 };
-#define JUMPALIGN() asm volatile (ASMALIGN(3)::)
-#define MOVQ_ZERO(regd) asm volatile ("pxor %%" #regd ", %%" #regd ::)
+#define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::)
+#define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)
#define MOVQ_BFE(regd) \
- asm volatile ( \
+ __asm__ volatile ( \
"pcmpeqd %%" #regd ", %%" #regd " \n\t"\
"paddb %%" #regd ", %%" #regd " \n\t" ::)
#ifndef PIC
-#define MOVQ_BONE(regd) asm volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_bone))
-#define MOVQ_WTWO(regd) asm volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_wtwo))
+#define MOVQ_BONE(regd) __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_bone))
+#define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_wtwo))
#else
// for shared library it's better to use this way for accessing constants
// pcmpeqd -> -1
#define MOVQ_BONE(regd) \
- asm volatile ( \
+ __asm__ volatile ( \
"pcmpeqd %%" #regd ", %%" #regd " \n\t" \
"psrlw $15, %%" #regd " \n\t" \
"packuswb %%" #regd ", %%" #regd " \n\t" ::)
#define MOVQ_WTWO(regd) \
- asm volatile ( \
+ __asm__ volatile ( \
"pcmpeqd %%" #regd ", %%" #regd " \n\t" \
"psrlw $15, %%" #regd " \n\t" \
"psllw $1, %%" #regd " \n\t"::)
p = block;
pix = pixels;
/* unrolled loop */
- asm volatile(
+ __asm__ volatile(
"movq %3, %%mm0 \n\t"
"movq 8%3, %%mm1 \n\t"
"movq 16%3, %%mm2 \n\t"
// if here would be an exact copy of the code above
// compiler would generate some very strange code
// thus using "r"
- asm volatile(
+ __asm__ volatile(
"movq (%3), %%mm0 \n\t"
"movq 8(%3), %%mm1 \n\t"
"movq 16(%3), %%mm2 \n\t"
MOVQ_ZERO(mm7);
i = 4;
do {
- asm volatile(
+ __asm__ volatile(
"movq (%2), %%mm0 \n\t"
"movq 8(%2), %%mm1 \n\t"
"movq 16(%2), %%mm2 \n\t"
static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
- asm volatile(
+ __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
ASMALIGN(3)
"1: \n\t"
static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
- asm volatile(
+ __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
ASMALIGN(3)
"1: \n\t"
static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
- asm volatile(
+ __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
ASMALIGN(3)
"1: \n\t"
static void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
- asm volatile(
+ __asm__ volatile(
"1: \n\t"
"movdqu (%1), %%xmm0 \n\t"
"movdqu (%1,%3), %%xmm1 \n\t"
static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
- asm volatile(
+ __asm__ volatile(
"1: \n\t"
"movdqu (%1), %%xmm0 \n\t"
"movdqu (%1,%3), %%xmm1 \n\t"
static void clear_blocks_mmx(DCTELEM *blocks)
{
- asm volatile(
+ __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t"
"mov $-128*6, %%"REG_a" \n\t"
"1: \n\t"
static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
x86_reg i=0;
- asm volatile(
+ __asm__ volatile(
"jmp 2f \n\t"
"1: \n\t"
"movq (%1, %0), %%mm0 \n\t"
static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
x86_reg i=0;
- asm volatile(
+ __asm__ volatile(
"jmp 2f \n\t"
"1: \n\t"
"movq (%2, %0), %%mm0 \n\t"
if(ENABLE_ANY_H263) {
const int strength= ff_h263_loop_filter_strength[qscale];
- asm volatile(
+ __asm__ volatile(
H263_LOOP_FILTER
}
static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
- asm volatile( //FIXME could save 1 instruction if done as 8x4 ...
+ __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...
"movd %4, %%mm0 \n\t"
"movd %5, %%mm1 \n\t"
"movd %6, %%mm2 \n\t"
transpose4x4(btemp , src , 8, stride);
transpose4x4(btemp+4, src + 4*stride, 8, stride);
- asm volatile(
+ __asm__ volatile(
H263_LOOP_FILTER // 5 3 4 6
: "+m" (temp[0]),
: "g" (2*strength), "m"(ff_pb_FC)
);
- asm volatile(
+ __asm__ volatile(
"movq %%mm5, %%mm1 \n\t"
"movq %%mm4, %%mm0 \n\t"
"punpcklbw %%mm3, %%mm5 \n\t"
ptr = buf;
if(w==8)
{
- asm volatile(
+ __asm__ volatile(
"1: \n\t"
"movd (%0), %%mm0 \n\t"
"punpcklbw %%mm0, %%mm0 \n\t"
}
else
{
- asm volatile(
+ __asm__ volatile(
"1: \n\t"
"movd (%0), %%mm0 \n\t"
"punpcklbw %%mm0, %%mm0 \n\t"
for(i=0;i<w;i+=4) {
/* top and bottom (and hopefully also the corners) */
ptr= buf - (i + 1) * wrap - w;
- asm volatile(
+ __asm__ volatile(
"1: \n\t"
"movq (%1, %0), %%mm0 \n\t"
"movq %%mm0, (%0) \n\t"
: "r" ((x86_reg)buf - (x86_reg)ptr - w), "r" ((x86_reg)-wrap), "r" ((x86_reg)-wrap*3), "r" (ptr+width+2*w)
);
ptr= last_line + (i + 1) * wrap - w;
- asm volatile(
+ __asm__ volatile(
"1: \n\t"
"movq (%1, %0), %%mm0 \n\t"
"movq %%mm0, (%0) \n\t"
{\
x86_reg i = -bpp;\
x86_reg end = w-3;\
- asm volatile(\
+ __asm__ volatile(\
"pxor %%mm7, %%mm7 \n"\
"movd (%1,%0), %%mm0 \n"\
"movd (%2,%0), %%mm1 \n"\
static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
uint64_t temp;\
\
- asm volatile(\
+ __asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"1: \n\t"\
"movq (%0), %%mm0 \n\t" /* ABCDEFGH */\
temp[13]= (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]);\
temp[14]= (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]);\
temp[15]= (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]);\
- asm volatile(\
+ __asm__ volatile(\
"movq (%0), %%mm0 \n\t"\
"movq 8(%0), %%mm1 \n\t"\
"paddw %2, %%mm0 \n\t"\
}\
\
static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
- asm volatile(\
+ __asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"1: \n\t"\
"movq (%0), %%mm0 \n\t" /* ABCDEFGH */\
temp[ 5]= (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 8]);\
temp[ 6]= (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 8])*3 - (src[ 3]+src[ 7]);\
temp[ 7]= (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 8])*6 + (src[ 5]+src[ 7])*3 - (src[ 4]+src[ 6]);\
- asm volatile(\
+ __asm__ volatile(\
"movq (%0), %%mm0 \n\t"\
"movq 8(%0), %%mm1 \n\t"\
"paddw %2, %%mm0 \n\t"\
int count= 17;\
\
/*FIXME unroll */\
- asm volatile(\
+ __asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"1: \n\t"\
"movq (%0), %%mm0 \n\t"\
count=4;\
\
/*FIXME reorder for speed */\
- asm volatile(\
+ __asm__ volatile(\
/*"pxor %%mm7, %%mm7 \n\t"*/\
"1: \n\t"\
"movq (%0), %%mm0 \n\t"\
int count= 9;\
\
/*FIXME unroll */\
- asm volatile(\
+ __asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"1: \n\t"\
"movq (%0), %%mm0 \n\t"\
count=2;\
\
/*FIXME reorder for speed */\
- asm volatile(\
+ __asm__ volatile(\
/*"pxor %%mm7, %%mm7 \n\t"*/\
"1: \n\t"\
"movq (%0), %%mm0 \n\t"\
src = edge_buf;
}
- asm volatile(
+ __asm__ volatile(
"movd %0, %%mm6 \n\t"
"pxor %%mm7, %%mm7 \n\t"
"punpcklwd %%mm6, %%mm6 \n\t"
oys - dyys + dyxs*(x+3) };
for(y=0; y<h; y++){
- asm volatile(
+ __asm__ volatile(
"movq %0, %%mm4 \n\t"
"movq %1, %%mm5 \n\t"
"paddw %2, %%mm4 \n\t"
: "m"(*dxy4), "m"(*dyy4)
);
- asm volatile(
+ __asm__ volatile(
"movq %%mm6, %%mm2 \n\t"
"movq %%mm6, %%mm1 \n\t"
"psubw %%mm4, %%mm2 \n\t"
static void name(void *mem, int stride, int h){\
const uint8_t *p= mem;\
do{\
- asm volatile(#op" %0" :: "m"(*p));\
+ __asm__ volatile(#op" %0" :: "m"(*p));\
p+= stride;\
}while(--h);\
}
static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize)
{
int i;
- asm volatile("pxor %%mm7, %%mm7":);
+ __asm__ volatile("pxor %%mm7, %%mm7":);
for(i=0; i<blocksize; i+=2) {
- asm volatile(
+ __asm__ volatile(
"movq %0, %%mm0 \n\t"
"movq %1, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
::"memory"
);
}
- asm volatile("femms");
+ __asm__ volatile("femms");
}
static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
{
int i;
- asm volatile(
+ __asm__ volatile(
"movaps %0, %%xmm5 \n\t"
::"m"(ff_pdw_80000000[0])
);
for(i=0; i<blocksize; i+=4) {
- asm volatile(
+ __asm__ volatile(
"movaps %0, %%xmm0 \n\t"
"movaps %1, %%xmm1 \n\t"
"xorps %%xmm2, %%xmm2 \n\t"
#define IF0(x)
#define MIX5(mono,stereo)\
- asm volatile(\
+ __asm__ volatile(\
"movss 0(%2), %%xmm5 \n"\
"movss 8(%2), %%xmm6 \n"\
"movss 24(%2), %%xmm7 \n"\
);
#define MIX_MISC(stereo)\
- asm volatile(\
+ __asm__ volatile(\
"1: \n"\
"movaps (%3,%0), %%xmm0 \n"\
stereo("movaps %%xmm0, %%xmm1 \n")\
} else {
DECLARE_ALIGNED_16(float, matrix_simd[in_ch][2][4]);
j = 2*in_ch*sizeof(float);
- asm volatile(
+ __asm__ volatile(
"1: \n"
"sub $8, %0 \n"
"movss (%2,%0), %%xmm6 \n"
static void vector_fmul_3dnow(float *dst, const float *src, int len){
x86_reg i = (len-4)*4;
- asm volatile(
+ __asm__ volatile(
"1: \n\t"
"movq (%1,%0), %%mm0 \n\t"
"movq 8(%1,%0), %%mm1 \n\t"
}
static void vector_fmul_sse(float *dst, const float *src, int len){
x86_reg i = (len-8)*4;
- asm volatile(
+ __asm__ volatile(
"1: \n\t"
"movaps (%1,%0), %%xmm0 \n\t"
"movaps 16(%1,%0), %%xmm1 \n\t"
static void vector_fmul_reverse_3dnow2(float *dst, const float *src0, const float *src1, int len){
x86_reg i = len*4-16;
- asm volatile(
+ __asm__ volatile(
"1: \n\t"
"pswapd 8(%1), %%mm0 \n\t"
"pswapd (%1), %%mm1 \n\t"
:"+r"(i), "+r"(src1)
:"r"(dst), "r"(src0)
);
- asm volatile("femms");
+ __asm__ volatile("femms");
}
static void vector_fmul_reverse_sse(float *dst, const float *src0, const float *src1, int len){
x86_reg i = len*4-32;
- asm volatile(
+ __asm__ volatile(
"1: \n\t"
"movaps 16(%1), %%xmm0 \n\t"
"movaps (%1), %%xmm1 \n\t"
x86_reg i = (len-4)*4;
if(step == 2 && src3 == 0){
dst += (len-4)*2;
- asm volatile(
+ __asm__ volatile(
"1: \n\t"
"movq (%2,%0), %%mm0 \n\t"
"movq 8(%2,%0), %%mm1 \n\t"
);
}
else if(step == 1 && src3 == 0){
- asm volatile(
+ __asm__ volatile(
"1: \n\t"
"movq (%2,%0), %%mm0 \n\t"
"movq 8(%2,%0), %%mm1 \n\t"
}
else
ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step);
- asm volatile("femms");
+ __asm__ volatile("femms");
}
static void vector_fmul_add_add_sse(float *dst, const float *src0, const float *src1,
const float *src2, int src3, int len, int step){
x86_reg i = (len-8)*4;
if(step == 2 && src3 == 0){
dst += (len-8)*2;
- asm volatile(
+ __asm__ volatile(
"1: \n\t"
"movaps (%2,%0), %%xmm0 \n\t"
"movaps 16(%2,%0), %%xmm1 \n\t"
);
}
else if(step == 1 && src3 == 0){
- asm volatile(
+ __asm__ volatile(
"1: \n\t"
"movaps (%2,%0), %%xmm0 \n\t"
"movaps 16(%2,%0), %%xmm1 \n\t"
if(add_bias == 0){
x86_reg i = -len*4;
x86_reg j = len*4-8;
- asm volatile(
+ __asm__ volatile(
"1: \n"
"pswapd (%5,%1), %%mm1 \n"
"movq (%5,%0), %%mm0 \n"
if(add_bias == 0){
x86_reg i = -len*4;
x86_reg j = len*4-16;
- asm volatile(
+ __asm__ volatile(
"1: \n"
"movaps (%5,%1), %%xmm1 \n"
"movaps (%5,%0), %%xmm0 \n"
static void int32_to_float_fmul_scalar_sse(float *dst, const int *src, float mul, int len)
{
x86_reg i = -4*len;
- asm volatile(
+ __asm__ volatile(
"movss %3, %%xmm4 \n"
"shufps $0, %%xmm4, %%xmm4 \n"
"1: \n"
static void int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mul, int len)
{
x86_reg i = -4*len;
- asm volatile(
+ __asm__ volatile(
"movss %3, %%xmm4 \n"
"shufps $0, %%xmm4, %%xmm4 \n"
"1: \n"
static void float_to_int16_3dnow(int16_t *dst, const float *src, long len){
// not bit-exact: pf2id uses different rounding than C and SSE
- asm volatile(
+ __asm__ volatile(
"add %0 , %0 \n\t"
"lea (%2,%0,2) , %2 \n\t"
"add %0 , %1 \n\t"
);
}
static void float_to_int16_sse(int16_t *dst, const float *src, long len){
- asm volatile(
+ __asm__ volatile(
"add %0 , %0 \n\t"
"lea (%2,%0,2) , %2 \n\t"
"add %0 , %1 \n\t"
}
static void float_to_int16_sse2(int16_t *dst, const float *src, long len){
- asm volatile(
+ __asm__ volatile(
"add %0 , %0 \n\t"
"lea (%2,%0,2) , %2 \n\t"
"add %0 , %1 \n\t"
else if(channels==2){\
const float *src0 = src[0];\
const float *src1 = src[1];\
- asm volatile(\
+ __asm__ volatile(\
"shl $2, %0 \n"\
"add %0, %1 \n"\
"add %0, %2 \n"\
x86_reg o = -(order << 1);
v1 += order;
v2 += order;
- asm volatile(
+ __asm__ volatile(
"1: \n\t"
"movdqu (%1,%2), %%xmm0 \n\t"
"movdqu 16(%1,%2), %%xmm1 \n\t"
x86_reg o = -(order << 1);
v1 += order;
v2 += order;
- asm volatile(
+ __asm__ volatile(
"1: \n\t"
"movdqa (%0,%2), %%xmm0 \n\t"
"movdqa 16(%0,%2), %%xmm2 \n\t"
v1 += order;
v2 += order;
sh = shift;
- asm volatile(
+ __asm__ volatile(
"pxor %%xmm7, %%xmm7 \n\t"
"1: \n\t"
"movdqu (%0,%3), %%xmm0 \n\t"
#endif
#define MOVQ_WONE(regd) \
- asm volatile ( \
+ __asm__ volatile ( \
"pcmpeqd %%" #regd ", %%" #regd " \n\t" \
"psrlw $15, %%" #regd ::)
*/
static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
- asm volatile(
+ __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{
- asm volatile(
+ __asm__ volatile(
"testl $1, %0 \n\t"
" jz 1f \n\t"
"movd (%1), %%mm0 \n\t"
static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{
- asm volatile(
+ __asm__ volatile(
"testl $1, %0 \n\t"
" jz 1f \n\t"
"movq (%1), %%mm0 \n\t"
static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{
- asm volatile(
+ __asm__ volatile(
"pcmpeqb %%mm6, %%mm6 \n\t"
"testl $1, %0 \n\t"
" jz 1f \n\t"
static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{
- asm volatile(
+ __asm__ volatile(
"testl $1, %0 \n\t"
" jz 1f \n\t"
"movd (%1), %%mm0 \n\t"
static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{
- asm volatile(
+ __asm__ volatile(
"testl $1, %0 \n\t"
" jz 1f \n\t"
"movq (%1), %%mm0 \n\t"
static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
- asm volatile(
+ __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{
- asm volatile(
+ __asm__ volatile(
"testl $1, %0 \n\t"
" jz 1f \n\t"
"movq (%1), %%mm0 \n\t"
static void DEF(avg_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{
- asm volatile(
+ __asm__ volatile(
"testl $1, %0 \n\t"
" jz 1f \n\t"
"movq (%1), %%mm0 \n\t"
static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{
- asm volatile(
+ __asm__ volatile(
"pcmpeqb %%mm6, %%mm6 \n\t"
"testl $1, %0 \n\t"
" jz 1f \n\t"
static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
MOVQ_BONE(mm6);
- asm volatile(
+ __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
- asm volatile(
+ __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t"
"sub %3, %2 \n\t"
static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
MOVQ_BONE(mm6);
- asm volatile(
+ __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t"
"sub %3, %2 \n\t"
static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
- asm volatile(
+ __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
"1: \n\t"
"movq (%2), %%mm0 \n\t"
static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
- asm volatile(
+ __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
- asm volatile(
+ __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t"
"sub %3, %2 \n\t"
static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
MOVQ_BONE(mm6);
- asm volatile(
+ __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t"
PAVGB" 1(%1), %%mm0 \n\t"
static void DEF(avg_pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
do {
- asm volatile(
+ __asm__ volatile(
"movd (%1), %%mm0 \n\t"
"movd (%1, %2), %%mm1 \n\t"
"movd (%1, %2, 2), %%mm2 \n\t"
#define QPEL_2TAP_L3(OPNAME) \
static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
- asm volatile(\
+ __asm__ volatile(\
"1: \n\t"\
"movq (%1,%2), %%mm0 \n\t"\
"movq 8(%1,%2), %%mm1 \n\t"\
);\
}\
static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
- asm volatile(\
+ __asm__ volatile(\
"1: \n\t"\
"movq (%1,%2), %%mm0 \n\t"\
PAVGB" (%1,%3), %%mm0 \n\t"\
scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
SET_RND(mm6);
- asm volatile(
+ __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t"
"movd %4, %%mm5 \n\t"
"punpcklwd %%mm5, %%mm5 \n\t"
if(FFABS(scale) < MAX_ABS){
scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
SET_RND(mm6);
- asm volatile(
+ __asm__ volatile(
"movd %3, %%mm5 \n\t"
"punpcklwd %%mm5, %%mm5 \n\t"
"punpcklwd %%mm5, %%mm5 \n\t"
static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
MOVQ_BFE(mm6);
- asm volatile(
+ __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
ASMALIGN(3)
"1: \n\t"
static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{
MOVQ_BFE(mm6);
- asm volatile(
+ __asm__ volatile(
"testl $1, %0 \n\t"
" jz 1f \n\t"
"movq (%1), %%mm0 \n\t"
static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
MOVQ_BFE(mm6);
- asm volatile(
+ __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
ASMALIGN(3)
"1: \n\t"
static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{
MOVQ_BFE(mm6);
- asm volatile(
+ __asm__ volatile(
"testl $1, %0 \n\t"
" jz 1f \n\t"
"movq (%1), %%mm0 \n\t"
static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
MOVQ_BFE(mm6);
- asm volatile(
+ __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t"
ASMALIGN(3)
{
MOVQ_ZERO(mm7);
SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
- asm volatile(
+ __asm__ volatile(
"movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm4 \n\t"
"movq %%mm0, %%mm1 \n\t"
MOVQ_BFE(mm6);
JUMPALIGN();
do {
- asm volatile(
+ __asm__ volatile(
"movd %0, %%mm0 \n\t"
"movd %1, %%mm1 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
MOVQ_BFE(mm6);
JUMPALIGN();
do {
- asm volatile(
+ __asm__ volatile(
"movq %0, %%mm0 \n\t"
"movq %1, %%mm1 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
MOVQ_BFE(mm6);
JUMPALIGN();
do {
- asm volatile(
+ __asm__ volatile(
"movq %0, %%mm0 \n\t"
"movq %1, %%mm1 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
MOVQ_BFE(mm6);
JUMPALIGN();
do {
- asm volatile(
+ __asm__ volatile(
"movq %1, %%mm0 \n\t"
"movq 1%1, %%mm1 \n\t"
"movq %0, %%mm3 \n\t"
MOVQ_BFE(mm6);
JUMPALIGN();
do {
- asm volatile(
+ __asm__ volatile(
"movq %1, %%mm0 \n\t"
"movq %2, %%mm1 \n\t"
"movq %0, %%mm3 \n\t"
MOVQ_BFE(mm6);
JUMPALIGN();
do {
- asm volatile(
+ __asm__ volatile(
"movq %1, %%mm0 \n\t"
"movq 1%1, %%mm1 \n\t"
"movq %0, %%mm3 \n\t"
MOVQ_BFE(mm6);
JUMPALIGN();
do {
- asm volatile(
+ __asm__ volatile(
"movq %1, %%mm0 \n\t"
"movq %2, %%mm1 \n\t"
"movq %0, %%mm3 \n\t"
static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
MOVQ_BFE(mm6);
- asm volatile(
+ __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t"
ASMALIGN(3)
{
MOVQ_ZERO(mm7);
SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
- asm volatile(
+ __asm__ volatile(
"movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm4 \n\t"
"movq %%mm0, %%mm1 \n\t"
static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size)
{
- asm volatile(
+ __asm__ volatile(
"mov $-128, %%"REG_a" \n\t"
"pxor %%mm7, %%mm7 \n\t"
ASMALIGN(4)
static void get_pixels_sse2(DCTELEM *block, const uint8_t *pixels, int line_size)
{
- asm volatile(
+ __asm__ volatile(
"pxor %%xmm7, %%xmm7 \n\t"
"movq (%0), %%xmm0 \n\t"
"movq (%0, %2), %%xmm1 \n\t"
static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride)
{
- asm volatile(
+ __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t"
"mov $-128, %%"REG_a" \n\t"
ASMALIGN(4)
int sum;
x86_reg index= -line_size*h;
- asm volatile(
+ __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t"
"pxor %%mm6, %%mm6 \n\t"
"1: \n\t"
static int pix_norm1_mmx(uint8_t *pix, int line_size) {
int tmp;
- asm volatile (
+ __asm__ volatile (
"movl $16,%%ecx\n"
"pxor %%mm0,%%mm0\n"
"pxor %%mm7,%%mm7\n"
static int sse8_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
int tmp;
- asm volatile (
+ __asm__ volatile (
"movl %4,%%ecx\n"
"shr $1,%%ecx\n"
"pxor %%mm0,%%mm0\n" /* mm0 = 0 */
static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
int tmp;
- asm volatile (
+ __asm__ volatile (
"movl %4,%%ecx\n"
"pxor %%mm0,%%mm0\n" /* mm0 = 0 */
"pxor %%mm7,%%mm7\n" /* mm7 holds the sum */
static int sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
int tmp;
- asm volatile (
+ __asm__ volatile (
"shr $1,%2\n"
"pxor %%xmm0,%%xmm0\n" /* mm0 = 0 */
"pxor %%xmm7,%%xmm7\n" /* mm7 holds the sum */
static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) {
int tmp;
- asm volatile (
+ __asm__ volatile (
"movl %3,%%ecx\n"
"pxor %%mm7,%%mm7\n"
"pxor %%mm6,%%mm6\n"
static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) {
int tmp;
uint8_t * pix= pix1;
- asm volatile (
+ __asm__ volatile (
"movl %3,%%ecx\n"
"pxor %%mm7,%%mm7\n"
"pxor %%mm6,%%mm6\n"
"paddw " #in0 ", %%mm6\n"
- asm volatile (
+ __asm__ volatile (
"movl %3,%%ecx\n"
"pxor %%mm6,%%mm6\n"
"pxor %%mm7,%%mm7\n"
"paddw " #in1 ", " #in0 "\n"\
"paddw " #in0 ", %%mm6\n"
- asm volatile (
+ __asm__ volatile (
"movl %3,%%ecx\n"
"pxor %%mm6,%%mm6\n"
"pxor %%mm7,%%mm7\n"
"paddw " #in0 ", %%mm6\n"
- asm volatile (
+ __asm__ volatile (
"movl %4,%%ecx\n"
"pxor %%mm6,%%mm6\n"
"pcmpeqw %%mm7,%%mm7\n"
"paddw " #in1 ", " #in0 "\n"\
"paddw " #in0 ", %%mm6\n"
- asm volatile (
+ __asm__ volatile (
"movl %4,%%ecx\n"
"pxor %%mm6,%%mm6\n"
"pcmpeqw %%mm7,%%mm7\n"
static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
x86_reg i=0;
- asm volatile(
+ __asm__ volatile(
"1: \n\t"
"movq (%2, %0), %%mm0 \n\t"
"movq (%1, %0), %%mm1 \n\t"
x86_reg i=0;
uint8_t l, lt;
- asm volatile(
+ __asm__ volatile(
"1: \n\t"
"movq -1(%1, %0), %%mm0 \n\t" // LT
"movq (%1, %0), %%mm1 \n\t" // T
#define DIFF_PIXELS_8(m0,m1,mm,p1,p2,stride,temp) {\
uint8_t *p1b=p1, *p2b=p2;\
- asm volatile(\
+ __asm__ volatile(\
DIFF_PIXELS_1(m0, mm##0, mm##7, (%1), (%2))\
DIFF_PIXELS_1(m0, mm##1, mm##7, (%1,%3), (%2,%3))\
DIFF_PIXELS_1(m0, mm##2, mm##7, (%1,%3,2), (%2,%3,2))\
\
DIFF_PIXELS_4x8(src1, src2, stride, temp[0]);\
\
- asm volatile(\
+ __asm__ volatile(\
HADAMARD48\
\
"movq %%mm7, 96(%1) \n\t"\
\
DIFF_PIXELS_4x8(src1+4, src2+4, stride, temp[4]);\
\
- asm volatile(\
+ __asm__ volatile(\
HADAMARD48\
\
"movq %%mm7, 96(%1) \n\t"\
\
DIFF_PIXELS_8x8(src1, src2, stride, temp[0]);\
\
- asm volatile(\
+ __asm__ volatile(\
HADAMARD8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7)\
TRANSPOSE8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7, (%1))\
HADAMARD8(%%xmm0, %%xmm5, %%xmm7, %%xmm3, %%xmm6, %%xmm4, %%xmm2, %%xmm1)\
#define DCT_SAD_FUNC(cpu) \
static int sum_abs_dctelem_##cpu(DCTELEM *block){\
int sum;\
- asm volatile(\
+ __asm__ volatile(\
DCT_SAD\
:"=r"(sum)\
:"r"(block)\
static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int size){
int sum;
x86_reg i=size;
- asm volatile(
+ __asm__ volatile(
"pxor %%mm4, %%mm4 \n"
"1: \n"
"sub $8, %0 \n"
static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
{
- asm volatile(
+ __asm__ volatile(
#define FDCT_ROW_SSE2_H1(i,t) \
"movq " #i "(%0), %%xmm2 \n\t" \
"movq " #i "+8(%0), %%xmm0 \n\t" \
int n = 1<<s->nbits;
int i;
ff_fft_dispatch_interleave_3dn2(z, s->nbits);
- asm volatile("femms");
+ __asm__ volatile("femms");
if(n <= 8)
for(i=0; i<n; i+=2)
FFSWAP(FFTSample, z[i].im, z[i+1].re);
in1 = input;
in2 = input + n2 - 1;
#ifdef EMULATE_3DNOWEXT
- asm volatile("movd %0, %%mm7" ::"r"(1<<31));
+ __asm__ volatile("movd %0, %%mm7" ::"r"(1<<31));
#endif
for(k = 0; k < n4; k++) {
// FIXME a single block is faster, but gcc 2.95 and 3.4.x on 32bit can't compile it
- asm volatile(
+ __asm__ volatile(
"movd %0, %%mm0 \n"
"movd %2, %%mm1 \n"
"punpckldq %1, %%mm0 \n"
::"m"(in2[-2*k]), "m"(in1[2*k]),
"m"(tcos[k]), "m"(tsin[k])
);
- asm volatile(
+ __asm__ volatile(
"movq %%mm0, %0 \n\t"
:"=m"(z[revtab[k]])
);
/* post rotation */
j = -n2;
k = n2-8;
- asm volatile(
+ __asm__ volatile(
"1: \n"
CMUL(%0, %%mm0, %%mm1)
CMUL(%1, %%mm2, %%mm3)
:"r"(z+n8), "r"(tcos+n8), "r"(tsin+n8)
:"memory"
);
- asm volatile("femms");
+ __asm__ volatile("femms");
}
void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input)
j = -n;
k = n-8;
- asm volatile(
+ __asm__ volatile(
"movq %4, %%mm7 \n"
"1: \n"
PSWAPD((%2,%1), %%mm0)
:"r"(output+n4), "r"(output+n4*3),
"m"(*m1m1)
);
- asm volatile("femms");
+ __asm__ volatile("femms");
}
if(n <= 16) {
x86_reg i = -8*n;
- asm volatile(
+ __asm__ volatile(
"1: \n"
"movaps (%0,%1), %%xmm0 \n"
"movaps %%xmm0, %%xmm1 \n"
int n = 1 << s->nbits;
int i;
for(i=0; i<n; i+=2) {
- asm volatile(
+ __asm__ volatile(
"movaps %2, %%xmm0 \n"
"movlps %%xmm0, %0 \n"
"movhps %%xmm0, %1 \n"
/* pre rotation */
for(k=n8-2; k>=0; k-=2) {
- asm volatile(
+ __asm__ volatile(
"movaps (%2,%1,2), %%xmm0 \n" // { z[k].re, z[k].im, z[k+1].re, z[k+1].im }
"movaps -16(%2,%0,2), %%xmm1 \n" // { z[-k-2].re, z[-k-2].im, z[-k-1].re, z[-k-1].im }
"movaps %%xmm0, %%xmm2 \n"
#ifdef ARCH_X86_64
// if we have enough regs, don't let gcc make the luts latency-bound
// but if not, latency is faster than spilling
- asm("movlps %%xmm0, %0 \n"
+ __asm__("movlps %%xmm0, %0 \n"
"movhps %%xmm0, %1 \n"
"movlps %%xmm1, %2 \n"
"movhps %%xmm1, %3 \n"
"=m"(z[revtab[ k+1]])
);
#else
- asm("movlps %%xmm0, %0" :"=m"(z[revtab[-k-2]]));
- asm("movhps %%xmm0, %0" :"=m"(z[revtab[-k-1]]));
- asm("movlps %%xmm1, %0" :"=m"(z[revtab[ k ]]));
- asm("movhps %%xmm1, %0" :"=m"(z[revtab[ k+1]]));
+ __asm__("movlps %%xmm0, %0" :"=m"(z[revtab[-k-2]]));
+ __asm__("movhps %%xmm0, %0" :"=m"(z[revtab[-k-1]]));
+ __asm__("movlps %%xmm1, %0" :"=m"(z[revtab[ k ]]));
+ __asm__("movhps %%xmm1, %0" :"=m"(z[revtab[ k+1]]));
#endif
}
j = -n2;
k = n2-16;
- asm volatile(
+ __asm__ volatile(
"1: \n"
CMUL(%0, %%xmm0, %%xmm1)
CMUL(%1, %%xmm4, %%xmm5)
j = -n;
k = n-16;
- asm volatile(
+ __asm__ volatile(
"movaps %4, %%xmm7 \n"
"1: \n"
"movaps (%2,%1), %%xmm0 \n"
int n2 = len>>1;
x86_reg i = -n2*sizeof(int32_t);
x86_reg j = n2*sizeof(int32_t);
- asm volatile(
+ __asm__ volatile(
"movsd %0, %%xmm7 \n\t"
"movapd "MANGLE(ff_pd_1)", %%xmm6 \n\t"
"movapd "MANGLE(ff_pd_2)", %%xmm5 \n\t"
::"m"(c)
);
#define WELCH(MOVPD, offset)\
- asm volatile(\
+ __asm__ volatile(\
"1: \n\t"\
"movapd %%xmm7, %%xmm1 \n\t"\
"mulpd %%xmm1, %%xmm1 \n\t"\
for(j=0; j<lag; j+=2){
x86_reg i = -len*sizeof(double);
if(j == lag-2) {
- asm volatile(
+ __asm__ volatile(
"movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t"
"movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t"
"movsd "MANGLE(ff_pd_1)", %%xmm2 \n\t"
:"r"(data1+len), "r"(data1+len-j)
);
} else {
- asm volatile(
+ __asm__ volatile(
"movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t"
"movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t"
"1: \n\t"
int minusstart= -(int)significant_coeff_ctx_base;
int minusindex= 4-(int)index;
int coeff_count;
- asm volatile(
+ __asm__ volatile(
"movl "RANGE "(%3), %%esi \n\t"
"movl "LOW "(%3), %%ebx \n\t"
int minusindex= 4-(int)index;
int coeff_count;
x86_reg last=0;
- asm volatile(
+ __asm__ volatile(
"movl "RANGE "(%3), %%esi \n\t"
"movl "LOW "(%3), %%ebx \n\t"
static void ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride)
{
/* Load dct coeffs */
- asm volatile(
+ __asm__ volatile(
"movq (%0), %%mm0 \n\t"
"movq 8(%0), %%mm1 \n\t"
"movq 16(%0), %%mm2 \n\t"
"movq 24(%0), %%mm3 \n\t"
:: "r"(block) );
- asm volatile(
+ __asm__ volatile(
/* mm1=s02+s13 mm2=s02-s13 mm4=d02+d13 mm0=d02-d13 */
IDCT4_1D( %%mm2, %%mm1, %%mm0, %%mm3, %%mm4 )
"pxor %%mm7, %%mm7 \n\t"
:: "m"(ff_pw_32));
- asm volatile(
+ __asm__ volatile(
STORE_DIFF_4P( %%mm0, %%mm1, %%mm7)
"add %1, %0 \n\t"
STORE_DIFF_4P( %%mm2, %%mm1, %%mm7)
static inline void h264_idct8_1d(int16_t *block)
{
- asm volatile(
+ __asm__ volatile(
"movq 112(%0), %%mm7 \n\t"
"movq 80(%0), %%mm0 \n\t"
"movq 48(%0), %%mm3 \n\t"
h264_idct8_1d(block+4*i);
- asm volatile(
+ __asm__ volatile(
"movq %%mm7, %0 \n\t"
TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 )
"movq %%mm0, 8(%1) \n\t"
for(i=0; i<2; i++){
h264_idct8_1d(b2+4*i);
- asm volatile(
+ __asm__ volatile(
"psraw $6, %%mm7 \n\t"
"psraw $6, %%mm6 \n\t"
"psraw $6, %%mm5 \n\t"
static void ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride)
{
- asm volatile(
+ __asm__ volatile(
"movdqa 0x10(%1), %%xmm1 \n"
"movdqa 0x20(%1), %%xmm2 \n"
"movdqa 0x30(%1), %%xmm3 \n"
static void ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
{
int dc = (block[0] + 32) >> 6;
- asm volatile(
+ __asm__ volatile(
"movd %0, %%mm0 \n\t"
"pshufw $0, %%mm0, %%mm0 \n\t"
"pxor %%mm1, %%mm1 \n\t"
"packuswb %%mm1, %%mm1 \n\t"
::"r"(dc)
);
- asm volatile(
+ __asm__ volatile(
"movd %0, %%mm2 \n\t"
"movd %1, %%mm3 \n\t"
"movd %2, %%mm4 \n\t"
{
int dc = (block[0] + 32) >> 6;
int y;
- asm volatile(
+ __asm__ volatile(
"movd %0, %%mm0 \n\t"
"pshufw $0, %%mm0, %%mm0 \n\t"
"pxor %%mm1, %%mm1 \n\t"
::"r"(dc)
);
for(y=2; y--; dst += 4*stride){
- asm volatile(
+ __asm__ volatile(
"movq %0, %%mm2 \n\t"
"movq %1, %%mm3 \n\t"
"movq %2, %%mm4 \n\t"
{
DECLARE_ALIGNED_8(uint64_t, tmp0[2]);
- asm volatile(
+ __asm__ volatile(
"movq (%1,%3), %%mm0 \n\t" //p1
"movq (%1,%3,2), %%mm1 \n\t" //p0
"movq (%2), %%mm2 \n\t" //q0
static inline void h264_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0)
{
- asm volatile(
+ __asm__ volatile(
"movq (%0), %%mm0 \n\t" //p1
"movq (%0,%2), %%mm1 \n\t" //p0
"movq (%1), %%mm2 \n\t" //q0
static inline void h264_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha1, int beta1)
{
- asm volatile(
+ __asm__ volatile(
"movq (%0), %%mm0 \n\t"
"movq (%0,%2), %%mm1 \n\t"
"movq (%1), %%mm2 \n\t"
static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field ) {
int dir;
- asm volatile(
+ __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t"
"movq %0, %%mm6 \n\t"
"movq %1, %%mm5 \n\t"
::"m"(ff_pb_1), "m"(ff_pb_3), "m"(ff_pb_7)
);
if(field)
- asm volatile(
+ __asm__ volatile(
"movq %0, %%mm5 \n\t"
"movq %1, %%mm4 \n\t"
::"m"(ff_pb_3_1), "m"(ff_pb_7_3)
DECLARE_ALIGNED_8(const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL;
int b_idx, edge, l;
for( b_idx=12, edge=0; edge<edges; edge+=step, b_idx+=8*step ) {
- asm volatile(
+ __asm__ volatile(
"pand %0, %%mm0 \n\t"
::"m"(mask_dir)
);
if(!(mask_mv & edge)) {
- asm volatile("pxor %%mm0, %%mm0 \n\t":);
+ __asm__ volatile("pxor %%mm0, %%mm0 \n\t":);
for( l = bidir; l >= 0; l-- ) {
- asm volatile(
+ __asm__ volatile(
"movd %0, %%mm1 \n\t"
"punpckldq %1, %%mm1 \n\t"
"movq %%mm1, %%mm2 \n\t"
);
}
}
- asm volatile(
+ __asm__ volatile(
"movd %0, %%mm1 \n\t"
"por %1, %%mm1 \n\t"
"punpcklbw %%mm7, %%mm1 \n\t"
::"m"(nnz[b_idx]),
"m"(nnz[b_idx+d_idx])
);
- asm volatile(
+ __asm__ volatile(
"pcmpeqw %%mm7, %%mm0 \n\t"
"pcmpeqw %%mm7, %%mm0 \n\t"
"psrlw $15, %%mm0 \n\t" // nonzero -> 1
edges = 4;
step = 1;
}
- asm volatile(
+ __asm__ volatile(
"movq (%0), %%mm0 \n\t"
"movq 8(%0), %%mm1 \n\t"
"movq 16(%0), %%mm2 \n\t"
static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
int h=4;\
\
- asm volatile(\
+ __asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"movq %5, %%mm4 \n\t"\
"movq %6, %%mm5 \n\t"\
}\
static av_noinline void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
int h=4;\
- asm volatile(\
+ __asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"movq %0, %%mm4 \n\t"\
"movq %1, %%mm5 \n\t"\
:: "m"(ff_pw_5), "m"(ff_pw_16)\
);\
do{\
- asm volatile(\
+ __asm__ volatile(\
"movd -1(%0), %%mm1 \n\t"\
"movd (%0), %%mm2 \n\t"\
"movd 1(%0), %%mm3 \n\t"\
}\
static av_noinline void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
src -= 2*srcStride;\
- asm volatile(\
+ __asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"movd (%0), %%mm0 \n\t"\
"add %2, %0 \n\t"\
int w=3;\
src -= 2*srcStride+2;\
while(w--){\
- asm volatile(\
+ __asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"movd (%0), %%mm0 \n\t"\
"add %2, %0 \n\t"\
src += 4 - 9*srcStride;\
}\
tmp -= 3*4;\
- asm volatile(\
+ __asm__ volatile(\
"1: \n\t"\
"movq (%0), %%mm0 \n\t"\
"paddw 10(%0), %%mm0 \n\t"\
\
static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
int h=8;\
- asm volatile(\
+ __asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"movq %5, %%mm6 \n\t"\
"1: \n\t"\
\
static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
int h=8;\
- asm volatile(\
+ __asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"movq %0, %%mm6 \n\t"\
:: "m"(ff_pw_5)\
);\
do{\
- asm volatile(\
+ __asm__ volatile(\
"movq (%0), %%mm0 \n\t"\
"movq 1(%0), %%mm2 \n\t"\
"movq %%mm0, %%mm1 \n\t"\
src -= 2*srcStride;\
\
while(w--){\
- asm volatile(\
+ __asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"movd (%0), %%mm0 \n\t"\
"add %2, %0 \n\t"\
: "memory"\
);\
if(h==16){\
- asm volatile(\
+ __asm__ volatile(\
QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
int w = (size+8)>>2;\
src -= 2*srcStride+2;\
while(w--){\
- asm volatile(\
+ __asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"movd (%0), %%mm0 \n\t"\
"add %2, %0 \n\t"\
: "memory"\
);\
if(size==16){\
- asm volatile(\
+ __asm__ volatile(\
QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 8*48)\
QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 9*48)\
QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 10*48)\
int w = size>>4;\
do{\
int h = size;\
- asm volatile(\
+ __asm__ volatile(\
"1: \n\t"\
"movq (%0), %%mm0 \n\t"\
"movq 8(%0), %%mm3 \n\t"\
\
static av_noinline void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
{\
- asm volatile(\
+ __asm__ volatile(\
"movq (%1), %%mm0 \n\t"\
"movq 24(%1), %%mm1 \n\t"\
"psraw $5, %%mm0 \n\t"\
static av_noinline void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
{\
do{\
- asm volatile(\
+ __asm__ volatile(\
"movq (%1), %%mm0 \n\t"\
"movq 8(%1), %%mm1 \n\t"\
"movq 48(%1), %%mm2 \n\t"\
#define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
int h=16;\
- asm volatile(\
+ __asm__ volatile(\
"pxor %%xmm15, %%xmm15 \n\t"\
"movdqa %6, %%xmm14 \n\t"\
"movdqa %7, %%xmm13 \n\t"\
#define QPEL_H264_H_XMM(OPNAME, OP, MMX)\
static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
int h=8;\
- asm volatile(\
+ __asm__ volatile(\
"pxor %%xmm7, %%xmm7 \n\t"\
"movdqa %0, %%xmm6 \n\t"\
:: "m"(ff_pw_5)\
);\
do{\
- asm volatile(\
+ __asm__ volatile(\
"lddqu -5(%0), %%xmm1 \n\t"\
"movdqa %%xmm1, %%xmm0 \n\t"\
"punpckhbw %%xmm7, %%xmm1 \n\t"\
\
static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
int h=8;\
- asm volatile(\
+ __asm__ volatile(\
"pxor %%xmm7, %%xmm7 \n\t"\
"movdqa %5, %%xmm6 \n\t"\
"1: \n\t"\
static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
src -= 2*srcStride;\
\
- asm volatile(\
+ __asm__ volatile(\
"pxor %%xmm7, %%xmm7 \n\t"\
"movq (%0), %%xmm0 \n\t"\
"add %2, %0 \n\t"\
: "memory"\
);\
if(h==16){\
- asm volatile(\
+ __asm__ volatile(\
QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\
QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\
QPEL_H264V_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, OP)\
int w = (size+8)>>3;
src -= 2*srcStride+2;
while(w--){
- asm volatile(
+ __asm__ volatile(
"pxor %%xmm7, %%xmm7 \n\t"
"movq (%0), %%xmm0 \n\t"
"add %2, %0 \n\t"
: "memory"
);
if(size==16){
- asm volatile(
+ __asm__ volatile(
QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 8*48)
QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 9*48)
QPEL_H264HV_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, 10*48)
static av_always_inline void OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\
int h = size;\
if(size == 16){\
- asm volatile(\
+ __asm__ volatile(\
"1: \n\t"\
"movdqa 32(%0), %%xmm4 \n\t"\
"movdqa 16(%0), %%xmm5 \n\t"\
: "memory"\
);\
}else{\
- asm volatile(\
+ __asm__ volatile(\
"1: \n\t"\
"movdqa 16(%0), %%xmm1 \n\t"\
"movdqa (%0), %%xmm0 \n\t"\
int x, y;
offset <<= log2_denom;
offset += (1 << log2_denom) >> 1;
- asm volatile(
+ __asm__ volatile(
"movd %0, %%mm4 \n\t"
"movd %1, %%mm5 \n\t"
"movd %2, %%mm6 \n\t"
);
for(y=0; y<h; y+=2){
for(x=0; x<w; x+=4){
- asm volatile(
+ __asm__ volatile(
"movd %0, %%mm0 \n\t"
"movd %1, %%mm1 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
{
int x, y;
offset = ((offset + 1) | 1) << log2_denom;
- asm volatile(
+ __asm__ volatile(
"movd %0, %%mm3 \n\t"
"movd %1, %%mm4 \n\t"
"movd %2, %%mm5 \n\t"
);
for(y=0; y<h; y++){
for(x=0; x<w; x+=4){
- asm volatile(
+ __asm__ volatile(
"movd %0, %%mm0 \n\t"
"movd %1, %%mm1 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
void ff_idct_xvid_mmx(short *block){
-asm volatile(
+__asm__ volatile(
//# Process each row
DCT_8_INV_ROW_MMX(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1))
DCT_8_INV_ROW_MMX(1*16(%0), 1*16(%0), 64*1(%2), 8*1(%1))
void ff_idct_xvid_mmx2(short *block){
-asm volatile(
+__asm__ volatile(
//# Process each row
DCT_8_INV_ROW_XMM(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1))
DCT_8_INV_ROW_XMM(1*16(%0), 1*16(%0), 64*1(%2), 8*1(%1))
inline void ff_idct_xvid_sse2(short *block)
{
- asm volatile(
+ __asm__ volatile(
"movq "MANGLE(m127)", %%mm0 \n\t"
iMTX_MULT("(%0)", MANGLE(iTab1), ROUND(walkenIdctRounders), PUT_EVEN(ROW0))
iMTX_MULT("1*16(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+1*16), PUT_ODD(ROW1))
#ifdef FRAC_BITS
# define MULL(ra, rb) \
- ({ int rt, dummy; asm (\
+ ({ int rt, dummy; __asm__ (\
"imull %3 \n\t"\
"shrdl %4, %%edx, %%eax \n\t"\
: "=a"(rt), "=d"(dummy)\
#define MULH(ra, rb) \
({ int rt, dummy;\
- asm ("imull %3\n\t" : "=d"(rt), "=a"(dummy): "a" ((int)ra), "rm" ((int)rb));\
+ __asm__ ("imull %3\n\t" : "=d"(rt), "=a"(dummy): "a" ((int)ra), "rm" ((int)rb));\
rt; })
#define MUL64(ra, rb) \
({ int64_t rt;\
- asm ("imull %2\n\t" : "=A"(rt) : "a" ((int)ra), "g" ((int)rb));\
+ __asm__ ("imull %2\n\t" : "=A"(rt) : "a" ((int)ra), "g" ((int)rb));\
rt; })
#endif /* AVCODEC_I386_MATHOPS_H */
#define mmx_i2r(op,imm,reg) \
- asm volatile (#op " %0, %%" #reg \
+ __asm__ volatile (#op " %0, %%" #reg \
: /* nothing */ \
: "i" (imm) )
#define mmx_m2r(op,mem,reg) \
- asm volatile (#op " %0, %%" #reg \
+ __asm__ volatile (#op " %0, %%" #reg \
: /* nothing */ \
: "m" (mem))
#define mmx_r2m(op,reg,mem) \
- asm volatile (#op " %%" #reg ", %0" \
+ __asm__ volatile (#op " %%" #reg ", %0" \
: "=m" (mem) \
: /* nothing */ )
#define mmx_r2r(op,regs,regd) \
- asm volatile (#op " %" #regs ", %" #regd)
+ __asm__ volatile (#op " %" #regs ", %" #regd)
-#define emms() asm volatile ("emms")
+#define emms() __asm__ volatile ("emms")
#define movd_m2r(var,reg) mmx_m2r (movd, var, reg)
#define movd_r2m(reg,var) mmx_r2m (movd, reg, var)
#define mmx_m2ri(op,mem,reg,imm) \
- asm volatile (#op " %1, %0, %%" #reg \
+ __asm__ volatile (#op " %1, %0, %%" #reg \
: /* nothing */ \
: "m" (mem), "i" (imm))
#define mmx_r2ri(op,regs,regd,imm) \
- asm volatile (#op " %0, %%" #regs ", %%" #regd \
+ __asm__ volatile (#op " %0, %%" #regs ", %%" #regd \
: /* nothing */ \
: "i" (imm) )
#define mmx_fetch(mem,hint) \
- asm volatile ("prefetch" #hint " %0" \
+ __asm__ volatile ("prefetch" #hint " %0" \
: /* nothing */ \
: "m" (mem))
#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd)
#define pmovmskb(mmreg,reg) \
- asm volatile ("movmskps %" #mmreg ", %" #reg)
+ __asm__ volatile ("movmskps %" #mmreg ", %" #reg)
#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg)
#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd)
#define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm)
#define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm)
-#define sfence() asm volatile ("sfence\n\t")
+#define sfence() __asm__ volatile ("sfence\n\t")
/* SSE2 */
#define pshufhw_m2r(var,reg,imm) mmx_m2ri(pshufhw, var, reg, imm)
static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{
x86_reg len= -(stride*h);
- asm volatile(
+ __asm__ volatile(
ASMALIGN(4)
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{
- asm volatile(
+ __asm__ volatile(
ASMALIGN(4)
"1: \n\t"
"movq (%1), %%mm0 \n\t"
static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)
{
int ret;
- asm volatile(
+ __asm__ volatile(
"pxor %%xmm6, %%xmm6 \n\t"
ASMALIGN(4)
"1: \n\t"
: "+r" (h), "+r" (blk1), "+r" (blk2)
: "r" ((x86_reg)stride)
);
- asm volatile(
+ __asm__ volatile(
"movhlps %%xmm6, %%xmm0 \n\t"
"paddw %%xmm0, %%xmm6 \n\t"
"movd %%xmm6, %0 \n\t"
static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{
- asm volatile(
+ __asm__ volatile(
ASMALIGN(4)
"1: \n\t"
"movq (%1), %%mm0 \n\t"
static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{
- asm volatile(
+ __asm__ volatile(
"movq (%1), %%mm0 \n\t"
"add %3, %1 \n\t"
ASMALIGN(4)
static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{
- asm volatile(
+ __asm__ volatile(
"movq "MANGLE(bone)", %%mm5 \n\t"
"movq (%1), %%mm0 \n\t"
"pavgb 1(%1), %%mm0 \n\t"
static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h)
{
x86_reg len= -(stride*h);
- asm volatile(
+ __asm__ volatile(
ASMALIGN(4)
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{
x86_reg len= -(stride*h);
- asm volatile(
+ __asm__ volatile(
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
"movq %%mm0, %%mm1 \n\t"
static inline int sum_mmx(void)
{
int ret;
- asm volatile(
+ __asm__ volatile(
"movq %%mm6, %%mm0 \n\t"
"psrlq $32, %%mm6 \n\t"
"paddw %%mm0, %%mm6 \n\t"
static inline int sum_mmx2(void)
{
int ret;
- asm volatile(
+ __asm__ volatile(
"movd %%mm6, %0 \n\t"
: "=r" (ret)
);
static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
assert(h==8);\
- asm volatile("pxor %%mm7, %%mm7 \n\t"\
+ __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t":);\
\
sad8_1_ ## suf(blk1, blk2, stride, 8);\
static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
assert(h==8);\
- asm volatile("pxor %%mm7, %%mm7 \n\t"\
+ __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
:: "m"(round_tab[1]) \
static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
assert(h==8);\
- asm volatile("pxor %%mm7, %%mm7 \n\t"\
+ __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
:: "m"(round_tab[1]) \
static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
assert(h==8);\
- asm volatile("pxor %%mm7, %%mm7 \n\t"\
+ __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
::);\
\
\
static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
- asm volatile("pxor %%mm7, %%mm7 \n\t"\
+ __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t":);\
\
sad8_1_ ## suf(blk1 , blk2 , stride, h);\
}\
static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
- asm volatile("pxor %%mm7, %%mm7 \n\t"\
+ __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
:: "m"(round_tab[1]) \
}\
static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
- asm volatile("pxor %%mm7, %%mm7 \n\t"\
+ __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
:: "m"(round_tab[1]) \
}\
static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
- asm volatile("pxor %%mm7, %%mm7 \n\t"\
+ __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
::);\
\
else
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
//printf("%d %d ", qmul, qadd);
-asm volatile(
+__asm__ volatile(
"movd %1, %%mm6 \n\t" //qmul
"packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
//printf("%d %d ", qmul, qadd);
-asm volatile(
+__asm__ volatile(
"movd %1, %%mm6 \n\t" //qmul
"packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
block0 = block[0] * s->c_dc_scale;
/* XXX: only mpeg1 */
quant_matrix = s->intra_matrix;
-asm volatile(
+__asm__ volatile(
"pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $15, %%mm7 \n\t"
"movd %2, %%mm6 \n\t"
nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
quant_matrix = s->inter_matrix;
-asm volatile(
+__asm__ volatile(
"pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $15, %%mm7 \n\t"
"movd %2, %%mm6 \n\t"
else
block0 = block[0] * s->c_dc_scale;
quant_matrix = s->intra_matrix;
-asm volatile(
+__asm__ volatile(
"pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $15, %%mm7 \n\t"
"movd %2, %%mm6 \n\t"
else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
quant_matrix = s->inter_matrix;
-asm volatile(
+__asm__ volatile(
"pcmpeqw %%mm7, %%mm7 \n\t"
"psrlq $48, %%mm7 \n\t"
"movd %2, %%mm6 \n\t"
s->dct_count[intra]++;
- asm volatile(
+ __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t"
"1: \n\t"
"pxor %%mm0, %%mm0 \n\t"
s->dct_count[intra]++;
- asm volatile(
+ __asm__ volatile(
"pxor %%xmm7, %%xmm7 \n\t"
"1: \n\t"
"pxor %%xmm0, %%xmm0 \n\t"
/* note: block[0] is assumed to be positive */
if (!s->h263_aic) {
#if 1
- asm volatile (
+ __asm__ volatile (
"mul %%ecx \n\t"
: "=d" (level), "=a"(dummy)
: "a" ((block[0]>>2) + q), "c" (ff_inverse[q<<1])
);
#else
- asm volatile (
+ __asm__ volatile (
"xorl %%edx, %%edx \n\t"
"divw %%cx \n\t"
"movzwl %%ax, %%eax \n\t"
if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){
- asm volatile(
+ __asm__ volatile(
"movd %%"REG_a", "MM"3 \n\t" // last_non_zero_p1
SPREADW(MM"3")
"pxor "MM"7, "MM"7 \n\t" // 0
"r" (inv_zigzag_direct16+64), "r" (temp_block+64)
);
}else{ // FMT_H263
- asm volatile(
+ __asm__ volatile(
"movd %%"REG_a", "MM"3 \n\t" // last_non_zero_p1
SPREADW(MM"3")
"pxor "MM"7, "MM"7 \n\t" // 0
"r" (inv_zigzag_direct16+64), "r" (temp_block+64)
);
}
- asm volatile(
+ __asm__ volatile(
"movd %1, "MM"1 \n\t" // max_qcoeff
SPREADW(MM"1")
"psubusw "MM"1, "MM"4 \n\t"
DECLARE_ALIGNED(8, int64_t, align_tmp[16]);
int16_t * const temp= (int16_t*)align_tmp;
- asm volatile(
+ __asm__ volatile(
#if 0 //Alternative, simpler variant
#define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
// calculate b[0] correctly afterwards.
i = 0;
- asm volatile(
+ __asm__ volatile(
"pcmpeqd %%xmm7, %%xmm7 \n\t"
"pcmpeqd %%xmm3, %%xmm3 \n\t"
"psllw $1, %%xmm3 \n\t"
"psllw $13, %%xmm3 \n\t"
::);
for(; i<w_l-15; i+=16){
- asm volatile(
+ __asm__ volatile(
"movdqu (%1), %%xmm1 \n\t"
"movdqu 16(%1), %%xmm5 \n\t"
"movdqu 2(%1), %%xmm2 \n\t"
dst[i] = dst[i] - (b[i] + b[i + 1]);
}
for(; i<w_r-15; i+=16){
- asm volatile(
+ __asm__ volatile(
"movdqu (%1), %%xmm1 \n\t"
"movdqu 16(%1), %%xmm5 \n\t"
"movdqu 2(%1), %%xmm2 \n\t"
IDWTELEM b_0 = b[0];
i = 0;
- asm volatile(
+ __asm__ volatile(
"psllw $15, %%xmm7 \n\t"
"pcmpeqw %%xmm6, %%xmm6 \n\t"
"psrlw $13, %%xmm6 \n\t"
"paddw %%xmm7, %%xmm6 \n\t"
::);
for(; i<w_l-15; i+=16){
- asm volatile(
+ __asm__ volatile(
"movdqu (%1), %%xmm0 \n\t"
"movdqu 16(%1), %%xmm4 \n\t"
"movdqu 2(%1), %%xmm1 \n\t"
temp[i] = src[i] - ((-W_AM*(b[i] + b[i+1]))>>W_AS);
}
for(; i<w_r-7; i+=8){
- asm volatile(
+ __asm__ volatile(
"movdqu 2(%1), %%xmm2 \n\t"
"movdqu 18(%1), %%xmm6 \n\t"
"paddw (%1), %%xmm2 \n\t"
b[i] = b[i>>1];
}
for (i-=62; i>=0; i-=64){
- asm volatile(
+ __asm__ volatile(
"movdqa (%1), %%xmm0 \n\t"
"movdqa 16(%1), %%xmm2 \n\t"
"movdqa 32(%1), %%xmm4 \n\t"
i = 1;
b[0] = b[0] - ((W_DM * 2 * ref[1]+W_DO)>>W_DS);
- asm volatile(
+ __asm__ volatile(
"pcmpeqw %%mm7, %%mm7 \n\t"
"pcmpeqw %%mm3, %%mm3 \n\t"
"psllw $1, %%mm3 \n\t"
"psllw $13, %%mm3 \n\t"
::);
for(; i<w_l-7; i+=8){
- asm volatile(
+ __asm__ volatile(
"movq (%1), %%mm2 \n\t"
"movq 8(%1), %%mm6 \n\t"
"paddw 2(%1), %%mm2 \n\t"
i = 0;
for(; i<w_r-7; i+=8){
- asm volatile(
+ __asm__ volatile(
"movq (%1), %%mm2 \n\t"
"movq 8(%1), %%mm6 \n\t"
"paddw 2(%1), %%mm2 \n\t"
i = 1;
b[0] = b[0] + (((2 * ref[1] + W_BO) + 4 * b[0]) >> W_BS);
- asm volatile(
+ __asm__ volatile(
"psllw $15, %%mm7 \n\t"
"pcmpeqw %%mm6, %%mm6 \n\t"
"psrlw $13, %%mm6 \n\t"
"paddw %%mm7, %%mm6 \n\t"
::);
for(; i<w_l-7; i+=8){
- asm volatile(
+ __asm__ volatile(
"movq (%1), %%mm0 \n\t"
"movq 8(%1), %%mm4 \n\t"
"movq 2(%1), %%mm1 \n\t"
i = 0;
for(; i<w_r-7; i+=8){
- asm volatile(
+ __asm__ volatile(
"movq 2(%1), %%mm2 \n\t"
"movq 10(%1), %%mm6 \n\t"
"paddw (%1), %%mm2 \n\t"
b[i] = b[i>>1];
}
for (i-=30; i>=0; i-=32){
- asm volatile(
+ __asm__ volatile(
"movq (%1), %%mm0 \n\t"
"movq 8(%1), %%mm2 \n\t"
"movq 16(%1), %%mm4 \n\t"
}
i+=i;
- asm volatile (
+ __asm__ volatile (
"jmp 2f \n\t"
"1: \n\t"
snow_vertical_compose_sse2_load("%4","xmm0","xmm2","xmm4","xmm6")
b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
}
i+=i;
- asm volatile(
+ __asm__ volatile(
"jmp 2f \n\t"
"1: \n\t"
#define snow_inner_add_yblock_sse2_header \
IDWTELEM * * dst_array = sb->line + src_y;\
x86_reg tmp;\
- asm volatile(\
+ __asm__ volatile(\
"mov %7, %%"REG_c" \n\t"\
"mov %6, %2 \n\t"\
"mov %4, %%"REG_S" \n\t"\
#define snow_inner_add_yblock_mmx_header \
IDWTELEM * * dst_array = sb->line + src_y;\
x86_reg tmp;\
- asm volatile(\
+ __asm__ volatile(\
"mov %7, %%"REG_c" \n\t"\
"mov %6, %2 \n\t"\
"mov %4, %%"REG_S" \n\t"\
const uint8_t *src, x86_reg stride,
int rnd, int64_t shift)
{
- asm volatile(
+ __asm__ volatile(
"mov $3, %%"REG_c" \n\t"
LOAD_ROUNDER_MMX("%5")
"movq "MANGLE(ff_pw_9)", %%mm6 \n\t"
src -= 1;
rnd -= (-1+9+9-1)*1024; /* Add -1024 bias */
- asm volatile(
+ __asm__ volatile(
LOAD_ROUNDER_MMX("%4")
"movq "MANGLE(ff_pw_128)", %%mm6\n\t"
"movq "MANGLE(ff_pw_9)", %%mm5 \n\t"
x86_reg stride, int rnd, x86_reg offset)
{
rnd = 8-rnd;
- asm volatile(
+ __asm__ volatile(
"mov $8, %%"REG_c" \n\t"
LOAD_ROUNDER_MMX("%5")
"movq "MANGLE(ff_pw_9)", %%mm6\n\t"
{ \
int h = 8; \
src -= src_stride; \
- asm volatile( \
+ __asm__ volatile( \
LOAD_ROUNDER_MMX("%5") \
"movq "MANGLE(ff_pw_53)", %%mm5\n\t" \
"movq "MANGLE(ff_pw_18)", %%mm6\n\t" \
int h = 8; \
src -= 1; \
rnd -= (-4+58+13-3)*256; /* Add -256 bias */ \
- asm volatile( \
+ __asm__ volatile( \
LOAD_ROUNDER_MMX("%4") \
"movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \
"movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \
int h = 8; \
src -= offset; \
rnd = 32-rnd; \
- asm volatile ( \
+ __asm__ volatile ( \
LOAD_ROUNDER_MMX("%6") \
"movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \
"movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \
static const vc1_mspel_mc_filter_8bits vc1_put_shift_8bits[] =
{ NULL, vc1_put_shift1_mmx, vc1_put_shift2_mmx, vc1_put_shift3_mmx };
- asm volatile(
+ __asm__ volatile(
"pxor %%mm0, %%mm0 \n\t"
::: "memory"
);
#define I(x) AV_STRINGIFY(16* x )"(%0)"
#define J(x) AV_STRINGIFY(16*(x-4) + 8)"(%0)"
- asm volatile (
+ __asm__ volatile (
RowIDCT()
Transpose()
#define O(x) I(x)
#define C(x) AV_STRINGIFY(16*(x-1))"(%1)"
- asm volatile (
+ __asm__ volatile (
VP3_1D_IDCT_SSE2(NOP, NOP)
TRANSPOSE8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7, (%0))
fact they decided to store the quantized DC (which would lead
to problems if Q could vary !) */
#if (defined(ARCH_X86)) && !defined PIC
- asm volatile(
+ __asm__ volatile(
"movl %3, %%eax \n\t"
"shrl $1, %%eax \n\t"
"addl %%eax, %2 \n\t"
#elif defined(RUNTIME_CPUDETECT)
int proc_ver;
// Support of mfspr PVR emulation added in Linux 2.6.17.
- asm volatile("mfspr %0, 287" : "=r" (proc_ver));
+ __asm__ volatile("mfspr %0, 287" : "=r" (proc_ver));
proc_ver >>= 16;
if (proc_ver & 0x8000 ||
proc_ver == 0x000c ||
i += 16;
}
for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) {
- asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
+ __asm__ volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
}
if (misal) {
((unsigned long*)blocks)[188] = 0L;
}
else
for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
- asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
+ __asm__ volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
}
#else
memset(blocks, 0, sizeof(DCTELEM)*6*64);
/* below the constraint "b" seems to mean "Address base register"
in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */
- asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
+ __asm__ volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
for (i = 0; i < 1024 ; i ++) {
if (fakedata[i] == (char)0)
{
register const uint8_t *p = mem;
do {
- asm volatile ("dcbt 0,%0" : : "r" (p));
+ __asm__ volatile ("dcbt 0,%0" : : "r" (p));
p+= stride;
} while(--h);
}
#ifndef HAVE_PPC64
#define POWERP_PMC_DATATYPE unsigned long
-#define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 937" : "=r" (a))
-#define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 938" : "=r" (a))
+#define POWERPC_GET_PMC1(a) __asm__ volatile("mfspr %0, 937" : "=r" (a))
+#define POWERPC_GET_PMC2(a) __asm__ volatile("mfspr %0, 938" : "=r" (a))
#if (POWERPC_NUM_PMC_ENABLED > 2)
-#define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 941" : "=r" (a))
-#define POWERPC_GET_PMC4(a) asm volatile("mfspr %0, 942" : "=r" (a))
+#define POWERPC_GET_PMC3(a) __asm__ volatile("mfspr %0, 941" : "=r" (a))
+#define POWERPC_GET_PMC4(a) __asm__ volatile("mfspr %0, 942" : "=r" (a))
#else
#define POWERPC_GET_PMC3(a) do {} while (0)
#define POWERPC_GET_PMC4(a) do {} while (0)
#endif
#if (POWERPC_NUM_PMC_ENABLED > 4)
-#define POWERPC_GET_PMC5(a) asm volatile("mfspr %0, 929" : "=r" (a))
-#define POWERPC_GET_PMC6(a) asm volatile("mfspr %0, 930" : "=r" (a))
+#define POWERPC_GET_PMC5(a) __asm__ volatile("mfspr %0, 929" : "=r" (a))
+#define POWERPC_GET_PMC6(a) __asm__ volatile("mfspr %0, 930" : "=r" (a))
#else
#define POWERPC_GET_PMC5(a) do {} while (0)
#define POWERPC_GET_PMC6(a) do {} while (0)
#endif
#else /* HAVE_PPC64 */
#define POWERP_PMC_DATATYPE unsigned long long
-#define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 771" : "=r" (a))
-#define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 772" : "=r" (a))
+#define POWERPC_GET_PMC1(a) __asm__ volatile("mfspr %0, 771" : "=r" (a))
+#define POWERPC_GET_PMC2(a) __asm__ volatile("mfspr %0, 772" : "=r" (a))
#if (POWERPC_NUM_PMC_ENABLED > 2)
-#define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 773" : "=r" (a))
-#define POWERPC_GET_PMC4(a) asm volatile("mfspr %0, 774" : "=r" (a))
+#define POWERPC_GET_PMC3(a) __asm__ volatile("mfspr %0, 773" : "=r" (a))
+#define POWERPC_GET_PMC4(a) __asm__ volatile("mfspr %0, 774" : "=r" (a))
#else
#define POWERPC_GET_PMC3(a) do {} while (0)
#define POWERPC_GET_PMC4(a) do {} while (0)
#endif
#if (POWERPC_NUM_PMC_ENABLED > 4)
-#define POWERPC_GET_PMC5(a) asm volatile("mfspr %0, 775" : "=r" (a))
-#define POWERPC_GET_PMC6(a) asm volatile("mfspr %0, 776" : "=r" (a))
+#define POWERPC_GET_PMC5(a) __asm__ volatile("mfspr %0, 775" : "=r" (a))
+#define POWERPC_GET_PMC6(a) __asm__ volatile("mfspr %0, 776" : "=r" (a))
#else
#define POWERPC_GET_PMC5(a) do {} while (0)
#define POWERPC_GET_PMC6(a) do {} while (0)
#if (__GNUC__ < 4)
# define REG_v(a)
#else
-# define REG_v(a) asm ( #a )
+# define REG_v(a) __asm__ ( #a )
#endif
#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
#if defined(ARCH_POWERPC_405)
/* signed 16x16 -> 32 multiply add accumulate */
#define MAC16(rt, ra, rb) \
- asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
+ __asm__ ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
/* signed 16x16 -> 32 multiply */
#define MUL16(ra, rb) \
({ int __rt; \
- asm ("mullhw %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); \
+ __asm__ ("mullhw %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); \
__rt; })
#endif
static void clear_blocks_mmi(DCTELEM * blocks)
{
- asm volatile(
+ __asm__ volatile(
".set noreorder \n"
"addiu $9, %0, 768 \n"
"nop \n"
static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size)
{
- asm volatile(
+ __asm__ volatile(
".set push \n\t"
".set mips3 \n\t"
"ld $8, 0(%0) \n\t"
static void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
- asm volatile(
+ __asm__ volatile(
".set push \n\t"
".set mips3 \n\t"
"1: \n\t"
static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
- asm volatile (
+ __asm__ volatile (
".set push \n\t"
".set mips3 \n\t"
"1: \n\t"
pmaxh($2, $0, $2); \
ppacb($0, $2, $2); \
sd3(2, 0, 4); \
- asm volatile ("add $4, $5, $4");
+ __asm__ volatile ("add $4, $5, $4");
#define DCT_8_INV_COL8_PUT() \
PUT($16); \
pmaxh($2, $0, $2); \
ppacb($0, $2, $2); \
sd3(2, 0, 4); \
- asm volatile ("add $4, $5, $4");
+ __asm__ volatile ("add $4, $5, $4");
/*fixme: schedule*/
#define DCT_8_INV_COL8_ADD() \
void ff_mmi_idct(int16_t * block)
{
/* $4 = block */
- asm volatile("la $24, %0"::"m"(consttable[0]));
+ __asm__ volatile("la $24, %0"::"m"(consttable[0]));
lq($24, ROUNDER_0, $8);
lq($24, ROUNDER_1, $7);
DCT_8_INV_ROW1($4, 0, TAB_i_04, $8, $8);
DCT_8_INV_COL8_STORE($4);
//let savedtemp regs be saved
- asm volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
+ __asm__ volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
}
void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
{
/* $4 = dest, $5 = line_size, $6 = block */
- asm volatile("la $24, %0"::"m"(consttable[0]));
+ __asm__ volatile("la $24, %0"::"m"(consttable[0]));
lq($24, ROUNDER_0, $8);
lq($24, ROUNDER_1, $7);
DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
DCT_8_INV_COL8_PUT();
//let savedtemp regs be saved
- asm volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
+ __asm__ volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
}
void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
{
/* $4 = dest, $5 = line_size, $6 = block */
- asm volatile("la $24, %0"::"m"(consttable[0]));
+ __asm__ volatile("la $24, %0"::"m"(consttable[0]));
lq($24, ROUNDER_0, $8);
lq($24, ROUNDER_1, $7);
DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
DCT_8_INV_COL8_ADD();
//let savedtemp regs be saved
- asm volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
+ __asm__ volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
}
#define lq(base, off, reg) \
- asm volatile ("lq " #reg ", %0("#base ")" : : "i" (off) )
+ __asm__ volatile ("lq " #reg ", %0("#base ")" : : "i" (off) )
#define lq2(mem, reg) \
- asm volatile ("lq " #reg ", %0" : : "r" (mem))
+ __asm__ volatile ("lq " #reg ", %0" : : "r" (mem))
#define sq(reg, off, base) \
- asm volatile ("sq " #reg ", %0("#base ")" : : "i" (off) )
+ __asm__ volatile ("sq " #reg ", %0("#base ")" : : "i" (off) )
/*
#define ld(base, off, reg) \
- asm volatile ("ld " #reg ", " #off "("#base ")")
+ __asm__ volatile ("ld " #reg ", " #off "("#base ")")
*/
#define ld3(base, off, reg) \
- asm volatile (".word %0" : : "i" ( 0xdc000000 | (base<<21) | (reg<<16) | (off)))
+ __asm__ volatile (".word %0" : : "i" ( 0xdc000000 | (base<<21) | (reg<<16) | (off)))
#define ldr3(base, off, reg) \
- asm volatile (".word %0" : : "i" ( 0x6c000000 | (base<<21) | (reg<<16) | (off)))
+ __asm__ volatile (".word %0" : : "i" ( 0x6c000000 | (base<<21) | (reg<<16) | (off)))
#define ldl3(base, off, reg) \
- asm volatile (".word %0" : : "i" ( 0x68000000 | (base<<21) | (reg<<16) | (off)))
+ __asm__ volatile (".word %0" : : "i" ( 0x68000000 | (base<<21) | (reg<<16) | (off)))
/*
#define sd(reg, off, base) \
- asm volatile ("sd " #reg ", " #off "("#base ")")
+ __asm__ volatile ("sd " #reg ", " #off "("#base ")")
*/
//seems assembler has bug encoding mnemonic 'sd', so DIY
#define sd3(reg, off, base) \
- asm volatile (".word %0" : : "i" ( 0xfc000000 | (base<<21) | (reg<<16) | (off)))
+ __asm__ volatile (".word %0" : : "i" ( 0xfc000000 | (base<<21) | (reg<<16) | (off)))
#define sw(reg, off, base) \
- asm volatile ("sw " #reg ", " #off "("#base ")")
+ __asm__ volatile ("sw " #reg ", " #off "("#base ")")
#define sq2(reg, mem) \
- asm volatile ("sq " #reg ", %0" : : "m" (*(mem)))
+ __asm__ volatile ("sq " #reg ", %0" : : "m" (*(mem)))
#define pinth(rs, rt, rd) \
- asm volatile ("pinth " #rd ", " #rs ", " #rt )
+ __asm__ volatile ("pinth " #rd ", " #rs ", " #rt )
#define phmadh(rs, rt, rd) \
- asm volatile ("phmadh " #rd ", " #rs ", " #rt )
+ __asm__ volatile ("phmadh " #rd ", " #rs ", " #rt )
#define pcpyud(rs, rt, rd) \
- asm volatile ("pcpyud " #rd ", " #rs ", " #rt )
+ __asm__ volatile ("pcpyud " #rd ", " #rs ", " #rt )
#define pcpyld(rs, rt, rd) \
- asm volatile ("pcpyld " #rd ", " #rs ", " #rt )
+ __asm__ volatile ("pcpyld " #rd ", " #rs ", " #rt )
#define pcpyh(rt, rd) \
- asm volatile ("pcpyh " #rd ", " #rt )
+ __asm__ volatile ("pcpyh " #rd ", " #rt )
#define paddw(rs, rt, rd) \
- asm volatile ("paddw " #rd ", " #rs ", " #rt )
+ __asm__ volatile ("paddw " #rd ", " #rs ", " #rt )
#define pextlw(rs, rt, rd) \
- asm volatile ("pextlw " #rd ", " #rs ", " #rt )
+ __asm__ volatile ("pextlw " #rd ", " #rs ", " #rt )
#define pextuw(rs, rt, rd) \
- asm volatile ("pextuw " #rd ", " #rs ", " #rt )
+ __asm__ volatile ("pextuw " #rd ", " #rs ", " #rt )
#define pextlh(rs, rt, rd) \
- asm volatile ("pextlh " #rd ", " #rs ", " #rt )
+ __asm__ volatile ("pextlh " #rd ", " #rs ", " #rt )
#define pextuh(rs, rt, rd) \
- asm volatile ("pextuh " #rd ", " #rs ", " #rt )
+ __asm__ volatile ("pextuh " #rd ", " #rs ", " #rt )
#define psubw(rs, rt, rd) \
- asm volatile ("psubw " #rd ", " #rs ", " #rt )
+ __asm__ volatile ("psubw " #rd ", " #rs ", " #rt )
#define psraw(rt, sa, rd) \
- asm volatile ("psraw " #rd ", " #rt ", %0" : : "i"(sa) )
+ __asm__ volatile ("psraw " #rd ", " #rt ", %0" : : "i"(sa) )
#define ppach(rs, rt, rd) \
- asm volatile ("ppach " #rd ", " #rs ", " #rt )
+ __asm__ volatile ("ppach " #rd ", " #rs ", " #rt )
#define ppacb(rs, rt, rd) \
- asm volatile ("ppacb " #rd ", " #rs ", " #rt )
+ __asm__ volatile ("ppacb " #rd ", " #rs ", " #rt )
#define prevh(rt, rd) \
- asm volatile ("prevh " #rd ", " #rt )
+ __asm__ volatile ("prevh " #rd ", " #rt )
#define pmulth(rs, rt, rd) \
- asm volatile ("pmulth " #rd ", " #rs ", " #rt )
+ __asm__ volatile ("pmulth " #rd ", " #rs ", " #rt )
#define pmaxh(rs, rt, rd) \
- asm volatile ("pmaxh " #rd ", " #rs ", " #rt )
+ __asm__ volatile ("pmaxh " #rd ", " #rs ", " #rt )
#define pminh(rs, rt, rd) \
- asm volatile ("pminh " #rd ", " #rs ", " #rt )
+ __asm__ volatile ("pminh " #rd ", " #rs ", " #rt )
#define pinteh(rs, rt, rd) \
- asm volatile ("pinteh " #rd ", " #rs ", " #rt )
+ __asm__ volatile ("pinteh " #rd ", " #rs ", " #rt )
#define paddh(rs, rt, rd) \
- asm volatile ("paddh " #rd ", " #rs ", " #rt )
+ __asm__ volatile ("paddh " #rd ", " #rs ", " #rt )
#define psubh(rs, rt, rd) \
- asm volatile ("psubh " #rd ", " #rs ", " #rt )
+ __asm__ volatile ("psubh " #rd ", " #rs ", " #rt )
#define psrah(rt, sa, rd) \
- asm volatile ("psrah " #rd ", " #rt ", %0" : : "i"(sa) )
+ __asm__ volatile ("psrah " #rd ", " #rt ", %0" : : "i"(sa) )
#define pmfhl_uw(rd) \
- asm volatile ("pmfhl.uw " #rd)
+ __asm__ volatile ("pmfhl.uw " #rd)
#define pextlb(rs, rt, rd) \
- asm volatile ("pextlb " #rd ", " #rs ", " #rt )
+ __asm__ volatile ("pextlb " #rd ", " #rs ", " #rt )
#endif /* AVCODEC_PS2_MMI_H */
nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
}
- asm volatile(
+ __asm__ volatile(
"add $14, $0, %3 \n\t"
"pcpyld $8, %0, %0 \n\t"
"pcpyh $8, $8 \n\t" //r8 = qmul
#if defined(__SH4__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
(char*)dst+=size;
size/=8*4;
- asm(
+ __asm__(
#if defined(__SH4__)
" fschg\n" //single float mode
#endif
#if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
#define load_matrix(table) \
- asm volatile( \
+ __asm__ volatile( \
" fschg\n" \
" fmov @%0+,xd0\n" \
" fmov @%0+,xd2\n" \
)
#define ftrv() \
- asm volatile("ftrv xmtrx,fv0" \
+ __asm__ volatile("ftrv xmtrx,fv0" \
: "=f"(fr0),"=f"(fr1),"=f"(fr2),"=f"(fr3) \
: "0"(fr0), "1"(fr1), "2"(fr2), "3"(fr3) );
#define DEFREG \
- register float fr0 asm("fr0"); \
- register float fr1 asm("fr1"); \
- register float fr2 asm("fr2"); \
- register float fr3 asm("fr3")
+ register float fr0 __asm__("fr0"); \
+ register float fr1 __asm__("fr1"); \
+ register float fr2 __asm__("fr2"); \
+ register float fr3 __asm__("fr3")
#else
/* signed 16x16 -> 32 multiply add accumulate */
#define MAC16(rt, ra, rb) \
- asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
+ __asm__ ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
/* signed 16x16 -> 32 multiply */
#define MUL16(rt, ra, rb) \
- asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb));
+ __asm__ ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb));
#else
int out1, out2, out3, out4;
DECLARE_ALIGNED_8(int16_t, temp[8*8]);
- asm volatile(
+ __asm__ volatile(
INIT_IDCT
#define ADDROUNDER
int out1, out2, out3, out4, out5;
int r1, r2, r3, r4, r5, r6, r7;
- asm volatile(
+ __asm__ volatile(
"wr %%g0, 0x8, %%gsr \n\t"
INIT_IDCT
int out1, out2, out3, out4, out5, out6;
int r1, r2, r3, r4, r5, r6, r7;
- asm volatile(
+ __asm__ volatile(
"wr %%g0, 0x8, %%gsr \n\t"
INIT_IDCT
#define vis_rd_d(X) (vis_dreg(X) << 25)
#define vis_ss2s(opf,rs1,rs2,rd) \
- asm volatile (".word %0" \
+ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \
vis_rs1_s(rs1) | \
vis_rs2_s(rs2) | \
vis_rd_s(rd)))
#define vis_dd2d(opf,rs1,rs2,rd) \
- asm volatile (".word %0" \
+ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \
vis_rs1_d(rs1) | \
vis_rs2_d(rs2) | \
vis_rd_d(rd)))
#define vis_ss2d(opf,rs1,rs2,rd) \
- asm volatile (".word %0" \
+ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \
vis_rs1_s(rs1) | \
vis_rs2_s(rs2) | \
vis_rd_d(rd)))
#define vis_sd2d(opf,rs1,rs2,rd) \
- asm volatile (".word %0" \
+ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \
vis_rs1_s(rs1) | \
vis_rs2_d(rs2) | \
vis_rd_d(rd)))
#define vis_d2s(opf,rs2,rd) \
- asm volatile (".word %0" \
+ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \
vis_rs2_d(rs2) | \
vis_rd_s(rd)))
#define vis_s2d(opf,rs2,rd) \
- asm volatile (".word %0" \
+ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \
vis_rs2_s(rs2) | \
vis_rd_d(rd)))
#define vis_d12d(opf,rs1,rd) \
- asm volatile (".word %0" \
+ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \
vis_rs1_d(rs1) | \
vis_rd_d(rd)))
#define vis_d22d(opf,rs2,rd) \
- asm volatile (".word %0" \
+ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \
vis_rs2_d(rs2) | \
vis_rd_d(rd)))
#define vis_s12s(opf,rs1,rd) \
- asm volatile (".word %0" \
+ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \
vis_rs1_s(rs1) | \
vis_rd_s(rd)))
#define vis_s22s(opf,rs2,rd) \
- asm volatile (".word %0" \
+ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \
vis_rs2_s(rs2) | \
vis_rd_s(rd)))
#define vis_s(opf,rd) \
- asm volatile (".word %0" \
+ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \
vis_rd_s(rd)))
#define vis_d(opf,rd) \
- asm volatile (".word %0" \
+ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \
vis_rd_d(rd)))
#define vis_r2m(op,rd,mem) \
- asm volatile (#op "\t%%f" #rd ", [%0]" : : "r" (&(mem)) )
+ __asm__ volatile (#op "\t%%f" #rd ", [%0]" : : "r" (&(mem)) )
#define vis_r2m_2(op,rd,mem1,mem2) \
- asm volatile (#op "\t%%f" #rd ", [%0 + %1]" : : "r" (mem1), "r" (mem2) )
+ __asm__ volatile (#op "\t%%f" #rd ", [%0 + %1]" : : "r" (mem1), "r" (mem2) )
#define vis_m2r(op,mem,rd) \
- asm volatile (#op "\t[%0], %%f" #rd : : "r" (&(mem)) )
+ __asm__ volatile (#op "\t[%0], %%f" #rd : : "r" (&(mem)) )
#define vis_m2r_2(op,mem1,mem2,rd) \
- asm volatile (#op "\t[%0 + %1], %%f" #rd : : "r" (mem1), "r" (mem2) )
+ __asm__ volatile (#op "\t[%0 + %1], %%f" #rd : : "r" (mem1), "r" (mem2) )
static inline void vis_set_gsr(unsigned int _val)
{
- register unsigned int val asm("g1");
+ register unsigned int val __asm__("g1");
val = _val;
- asm volatile(".word 0xa7804000"
+ __asm__ volatile(".word 0xa7804000"
: : "r" (val));
}
#define vis_st64_2(rs1,mem1,mem2) vis_r2m_2(std, rs1, mem1, mem2)
#define vis_ldblk(mem, rd) \
-do { register void *__mem asm("g1"); \
+do { register void *__mem __asm__("g1"); \
__mem = &(mem); \
- asm volatile(".word 0xc1985e00 | %1" \
+ __asm__ volatile(".word 0xc1985e00 | %1" \
: \
: "r" (__mem), \
"i" (vis_rd_d(rd)) \
} while (0)
#define vis_stblk(rd, mem) \
-do { register void *__mem asm("g1"); \
+do { register void *__mem __asm__("g1"); \
__mem = &(mem); \
- asm volatile(".word 0xc1b85e00 | %1" \
+ __asm__ volatile(".word 0xc1b85e00 | %1" \
: \
: "r" (__mem), \
"i" (vis_rd_d(rd)) \
} while (0)
#define vis_membar_storestore() \
- asm volatile(".word 0x8143e008" : : : "memory")
+ __asm__ volatile(".word 0x8143e008" : : : "memory")
#define vis_membar_sync() \
- asm volatile(".word 0x8143e040" : : : "memory")
+ __asm__ volatile(".word 0x8143e040" : : : "memory")
/* 16 and 32 bit partitioned addition and subtraction. The normal
* versions perform 4 16-bit or 2 32-bit additions or subtractions.
static inline void *vis_alignaddr(void *_ptr)
{
- register void *ptr asm("g1");
+ register void *ptr __asm__("g1");
ptr = _ptr;
- asm volatile(".word %2"
+ __asm__ volatile(".word %2"
: "=&r" (ptr)
: "0" (ptr),
"i" (vis_opc_base | vis_opf(0x18) |
static inline void vis_alignaddr_g0(void *_ptr)
{
- register void *ptr asm("g1");
+ register void *ptr __asm__("g1");
ptr = _ptr;
- asm volatile(".word %2"
+ __asm__ volatile(".word %2"
: "=&r" (ptr)
: "0" (ptr),
"i" (vis_opc_base | vis_opf(0x18) |
static inline void *vis_alignaddrl(void *_ptr)
{
- register void *ptr asm("g1");
+ register void *ptr __asm__("g1");
ptr = _ptr;
- asm volatile(".word %2"
+ __asm__ volatile(".word %2"
: "=&r" (ptr)
: "0" (ptr),
"i" (vis_opc_base | vis_opf(0x19) |
static inline void vis_alignaddrl_g0(void *_ptr)
{
- register void *ptr asm("g1");
+ register void *ptr __asm__("g1");
ptr = _ptr;
- asm volatile(".word %2"
+ __asm__ volatile(".word %2"
: "=&r" (ptr)
: "0" (ptr),
"i" (vis_opc_base | vis_opf(0x19) |
static av_always_inline av_const uint16_t bswap_16(uint16_t x)
{
#if defined(ARCH_X86)
- asm("rorw $8, %0" : "+r"(x));
+ __asm__("rorw $8, %0" : "+r"(x));
#elif defined(ARCH_SH4)
- asm("swap.b %0,%0" : "=r"(x) : "0"(x));
+ __asm__("swap.b %0,%0" : "=r"(x) : "0"(x));
#elif defined(HAVE_ARMV6)
- asm("rev16 %0, %0" : "+r"(x));
+ __asm__("rev16 %0, %0" : "+r"(x));
#else
x= (x>>8) | (x<<8);
#endif
{
#if defined(ARCH_X86)
#ifdef HAVE_BSWAP
- asm("bswap %0" : "+r" (x));
+ __asm__("bswap %0" : "+r" (x));
#else
- asm("rorw $8, %w0 \n\t"
+ __asm__("rorw $8, %w0 \n\t"
"rorl $16, %0 \n\t"
"rorw $8, %w0"
: "+r"(x));
#endif
#elif defined(ARCH_SH4)
- asm("swap.b %0,%0\n"
+ __asm__("swap.b %0,%0\n"
"swap.w %0,%0\n"
"swap.b %0,%0\n"
: "=r"(x) : "0"(x));
#elif defined(HAVE_ARMV6)
- asm("rev %0, %0" : "+r"(x));
+ __asm__("rev %0, %0" : "+r"(x));
#elif defined(ARCH_ARMV4L)
uint32_t t;
- asm ("eor %1, %0, %0, ror #16 \n\t"
+ __asm__ ("eor %1, %0, %0, ror #16 \n\t"
"bic %1, %1, #0xFF0000 \n\t"
"mov %0, %0, ror #8 \n\t"
"eor %0, %0, %1, lsr #8 \n\t"
: "+r"(x), "+r"(t));
#elif defined(ARCH_BFIN)
unsigned tmp;
- asm("%1 = %0 >> 8 (V); \n\t"
+ __asm__("%1 = %0 >> 8 (V); \n\t"
"%0 = %0 << 8 (V); \n\t"
"%0 = %0 | %1; \n\t"
"%0 = PACK(%0.L, %0.H); \n\t"
x= ((x<<16)&0xFFFF0000FFFF0000ULL) | ((x>>16)&0x0000FFFF0000FFFFULL);
return (x>>32) | (x<<32);
#elif defined(ARCH_X86_64)
- asm("bswap %0": "=r" (x) : "0" (x));
+ __asm__("bswap %0": "=r" (x) : "0" (x));
return x;
#else
union {
{
#ifdef HAVE_CMOV
int i=b;
- asm volatile(
+ __asm__ volatile(
"cmp %2, %1 \n\t"
"cmovg %1, %0 \n\t"
"cmovg %2, %1 \n\t"
static inline uint64_t read_time(void)
{
uint64_t a, d;
- asm volatile("rdtsc\n\t"
+ __asm__ volatile("rdtsc\n\t"
: "=a" (a), "=d" (d));
return (d << 32) | (a & 0xffffffff);
}
static inline long long read_time(void)
{
long long l;
- asm volatile("rdtsc\n\t"
+ __asm__ volatile("rdtsc\n\t"
: "=A" (l));
return l;
}
} p;
unsigned long long c;
} t;
- asm volatile ("%0=cycles; %1=cycles2;" : "=d" (t.p.lo), "=d" (t.p.hi));
+ __asm__ volatile ("%0=cycles; %1=cycles2;" : "=d" (t.p.lo), "=d" (t.p.hi));
return t.c;
}
#else //FIXME check ppc64
uint32_t tbu, tbl, temp;
/* from section 2.2.1 of the 32-bit PowerPC PEM */
- asm volatile(
+ __asm__ volatile(
"1:\n"
"mftbu %2\n"
"mftb %0\n"
# define FASTDIV(a,b) \
({\
int ret,dmy;\
- asm volatile(\
+ __asm__ volatile(\
"mull %3"\
:"=d"(ret),"=a"(dmy)\
:"1"(a),"g"(ff_inverse[b])\
static inline av_const int FASTDIV(int a, int b)
{
int r;
- asm volatile("cmp %2, #0 \n\t"
+ __asm__ volatile("cmp %2, #0 \n\t"
"smmul %0, %1, %2 \n\t"
"rsblt %0, %0, #0 \n\t"
: "=r"(r) : "r"(a), "r"(ff_inverse[b]));
# define FASTDIV(a,b) \
({\
int ret,dmy;\
- asm volatile(\
+ __asm__ volatile(\
"umull %1, %0, %2, %3"\
:"=&r"(ret),"=&r"(dmy)\
:"r"(a),"r"(ff_inverse[b])\
#if defined(ARCH_X86)
#define MASK_ABS(mask, level)\
- asm volatile(\
+ __asm__ volatile(\
"cltd \n\t"\
"xorl %1, %0 \n\t"\
"subl %1, %0 \n\t"\
#ifdef HAVE_CMOV
#define COPY3_IF_LT(x,y,a,b,c,d)\
-asm volatile (\
+__asm__ volatile (\
"cmpl %0, %3 \n\t"\
"cmovl %3, %0 \n\t"\
"cmovl %4, %1 \n\t"\
#if defined(ARCH_X86)
static inline void prefetchnta(void *p)
{
- asm volatile( "prefetchnta (%0)\n\t"
+ __asm__ volatile( "prefetchnta (%0)\n\t"
: : "r" (p)
);
}
static inline void prefetcht0(void *p)
{
- asm volatile( "prefetcht0 (%0)\n\t"
+ __asm__ volatile( "prefetcht0 (%0)\n\t"
: : "r" (p)
);
}
static inline void prefetcht1(void *p)
{
- asm volatile( "prefetcht1 (%0)\n\t"
+ __asm__ volatile( "prefetcht1 (%0)\n\t"
: : "r" (p)
);
}
static inline void prefetcht2(void *p)
{
- asm volatile( "prefetcht2 (%0)\n\t"
+ __asm__ volatile( "prefetcht2 (%0)\n\t"
: : "r" (p)
);
}
static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
int numEq= 0, dcOk;
src+= stride*4; // src points to begin of the 8x8 Block
- asm volatile(
+ __asm__ volatile(
"movq %0, %%mm7 \n\t"
"movq %1, %%mm6 \n\t"
: : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP])
);
- asm volatile(
+ __asm__ volatile(
"lea (%2, %3), %%"REG_a" \n\t"
// 0 1 2 3 4 5 6 7 8 9
// %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
src+= stride*3;
- asm volatile( //"movv %0 %1 %2\n\t"
+ __asm__ volatile( //"movv %0 %1 %2\n\t"
"movq %2, %%mm0 \n\t" // QP,..., QP
"pxor %%mm4, %%mm4 \n\t"
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
src+= stride*3;
// FIXME rounding
- asm volatile(
+ __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t" // 0
"movq "MANGLE(b80)", %%mm6 \n\t" // MIN_SIGNED_BYTE
"leal (%0, %1), %%"REG_a" \n\t"
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
src+= stride*3;
- asm volatile(
+ __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t" // 0
"lea (%0, %1), %%"REG_a" \n\t"
"lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
memcpy(tmp+8, src+stride*8, 8);
*/
src+= stride*4;
- asm volatile(
+ __asm__ volatile(
#if 0 //slightly more accurate and slightly slower
"pxor %%mm7, %%mm7 \n\t" // 0
*/
#elif defined (HAVE_MMX)
src+= stride*4;
- asm volatile(
+ __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t"
"lea -40(%%"REG_SP"), %%"REG_c" \n\t" // make space for 4 8-byte vars
"and "ALIGN_MASK", %%"REG_c" \n\t" // align
static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
- asm volatile(
+ __asm__ volatile(
"pxor %%mm6, %%mm6 \n\t"
"pcmpeqb %%mm7, %%mm7 \n\t"
"movq %2, %%mm0 \n\t"
f= (f + 8)>>4;
#ifdef DEBUG_DERING_THRESHOLD
- asm volatile("emms\n\t":);
+ __asm__ volatile("emms\n\t":);
{
static long long numPixels=0;
if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++;
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
src+= 4*stride;
- asm volatile(
+ __asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t"
"lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
// 0 1 2 3 4 5 6 7 8 9
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
src+= stride*3;
- asm volatile(
+ __asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t"
"lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
"lea (%%"REG_d", %1, 4), %%"REG_c" \n\t"
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
src+= stride*4;
- asm volatile(
+ __asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t"
"lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
"pxor %%mm7, %%mm7 \n\t"
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
src+= stride*4;
- asm volatile(
+ __asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t"
"lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
"pxor %%mm7, %%mm7 \n\t"
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
src+= 4*stride;
- asm volatile(
+ __asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t"
"lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
// 0 1 2 3 4 5 6 7 8 9
#ifdef HAVE_MMX
src+= 4*stride;
#ifdef HAVE_MMX2
- asm volatile(
+ __asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t"
"lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
// 0 1 2 3 4 5 6 7 8 9
);
#else // MMX without MMX2
- asm volatile(
+ __asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t"
"lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
// 0 1 2 3 4 5 6 7 8 9
*/
static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride)
{
- asm(
+ __asm__(
"lea (%0, %1), %%"REG_a" \n\t"
// 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
*/
static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src)
{
- asm(
+ __asm__(
"lea (%0, %1), %%"REG_a" \n\t"
"lea (%%"REG_a",%1,4), %%"REG_d" \n\t"
// 0 1 2 3 4 5 6 7 8 9
#define FAST_L2_DIFF
//#define L1_DIFF //u should change the thresholds too if u try that one
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
- asm volatile(
+ __asm__ volatile(
"lea (%2, %2, 2), %%"REG_a" \n\t" // 3*stride
"lea (%2, %2, 4), %%"REG_d" \n\t" // 5*stride
"lea (%%"REG_d", %2, 2), %%"REG_c" \n\t" // 7*stride
int64_t sums[10*8*2];
src+= step*3; // src points to begin of the 8x8 Block
//START_TIMER
- asm volatile(
+ __asm__ volatile(
"movq %0, %%mm7 \n\t"
"movq %1, %%mm6 \n\t"
: : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP])
);
- asm volatile(
+ __asm__ volatile(
"lea (%2, %3), %%"REG_a" \n\t"
// 0 1 2 3 4 5 6 7 8 9
// %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2
long offset= -8*step;
int64_t *temp_sums= sums;
- asm volatile(
+ __asm__ volatile(
"movq %2, %%mm0 \n\t" // QP,..., QP
"pxor %%mm4, %%mm4 \n\t"
src+= step; // src points to begin of the 8x8 Block
- asm volatile(
+ __asm__ volatile(
"movq %4, %%mm6 \n\t"
"pcmpeqb %%mm5, %%mm5 \n\t"
"pxor %%mm6, %%mm5 \n\t"
if(eq_mask != -1LL){
uint8_t *temp_src= src;
- asm volatile(
+ __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t"
"lea -40(%%"REG_SP"), %%"REG_c" \n\t" // make space for 4 8-byte vars
"and "ALIGN_MASK", %%"REG_c" \n\t" // align
#endif
if(levelFix){
#ifdef HAVE_MMX
- asm volatile(
+ __asm__ volatile(
"movq (%%"REG_a"), %%mm2 \n\t" // packedYOffset
"movq 8(%%"REG_a"), %%mm3 \n\t" // packedYScale
"lea (%2,%4), %%"REG_a" \n\t"
#endif //HAVE_MMX
}else{
#ifdef HAVE_MMX
- asm volatile(
+ __asm__ volatile(
"lea (%0,%2), %%"REG_a" \n\t"
"lea (%1,%3), %%"REG_d" \n\t"
static inline void RENAME(duplicate)(uint8_t src[], int stride)
{
#ifdef HAVE_MMX
- asm volatile(
+ __asm__ volatile(
"movq (%0), %%mm0 \n\t"
"add %1, %0 \n\t"
"movq %%mm0, (%0) \n\t"
prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
*/
- asm(
+ __asm__(
"mov %4, %%"REG_a" \n\t"
"shr $2, %%"REG_a" \n\t"
"and $6, %%"REG_a" \n\t"
}
c.QP= QP;
#ifdef HAVE_MMX
- asm volatile(
+ __asm__ volatile(
"movd %1, %%mm7 \n\t"
"packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
"packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
*/
- asm(
+ __asm__(
"mov %4, %%"REG_a" \n\t"
"shr $2, %%"REG_a" \n\t"
"and $6, %%"REG_a" \n\t"
}*/
}
#ifdef HAVE_3DNOW
- asm volatile("femms");
+ __asm__ volatile("femms");
#elif defined (HAVE_MMX)
- asm volatile("emms");
+ __asm__ volatile("emms");
#endif
#ifdef DEBUG_BRIGHTNESS