vassert(cond != Acc_ALWAYS);
return i;
}
-AMD64Instr* AMD64Instr_MovZLQ ( HReg src, HReg dst ) {
- AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
- i->tag = Ain_MovZLQ;
- i->Ain.MovZLQ.src = src;
- i->Ain.MovZLQ.dst = dst;
+AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_MovxLQ;
+ i->Ain.MovxLQ.syned = syned;
+ i->Ain.MovxLQ.src = src;
+ i->Ain.MovxLQ.dst = dst;
return i;
}
AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
vex_printf(",");
ppHRegAMD64(i->Ain.CMov64.dst);
return;
- case Ain_MovZLQ:
- vex_printf("movzlq ");
- ppHRegAMD64_lo32(i->Ain.MovZLQ.src);
+ case Ain_MovxLQ:
+ vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z');
+ ppHRegAMD64_lo32(i->Ain.MovxLQ.src);
vex_printf(",");
- ppHRegAMD64(i->Ain.MovZLQ.dst);
+ ppHRegAMD64(i->Ain.MovxLQ.dst);
return;
case Ain_LoadEX:
if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) {
addRegUsage_AMD64RM(u, i->Ain.CMov64.src, HRmRead);
addHRegUse(u, HRmModify, i->Ain.CMov64.dst);
return;
- case Ain_MovZLQ:
- addHRegUse(u, HRmRead, i->Ain.MovZLQ.src);
- addHRegUse(u, HRmWrite, i->Ain.MovZLQ.dst);
+ case Ain_MovxLQ:
+ addHRegUse(u, HRmRead, i->Ain.MovxLQ.src);
+ addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst);
return;
case Ain_LoadEX:
addRegUsage_AMD64AMode(u, i->Ain.LoadEX.src);
mapRegs_AMD64RM(m, i->Ain.CMov64.src);
mapReg(m, &i->Ain.CMov64.dst);
return;
- case Ain_MovZLQ:
- mapReg(m, &i->Ain.MovZLQ.src);
- mapReg(m, &i->Ain.MovZLQ.dst);
+ case Ain_MovxLQ:
+ mapReg(m, &i->Ain.MovxLQ.src);
+ mapReg(m, &i->Ain.MovxLQ.dst);
return;
case Ain_LoadEX:
mapRegs_AMD64AMode(m, i->Ain.LoadEX.src);
switch (i->tag) {
case Ain_Imm64:
- *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Imm64.dst)));
- *p++ = toUChar(0xB8 + iregBits210(i->Ain.Imm64.dst));
- p = emit64(p, i->Ain.Imm64.imm64);
+ if (i->Ain.Imm64.imm64 <= 0xFFFFFULL) {
+ /* Use the short form (load into 32 bit reg, + default
+ widening rule) for constants under 1 million. We could
+ use this form for the range 0 to 0x7FFFFFFF inclusive, but
+ limit it to a smaller range for verifiability purposes. */
+ if (1 & iregBit3(i->Ain.Imm64.dst))
+ *p++ = 0x41;
+ *p++ = 0xB8 + iregBits210(i->Ain.Imm64.dst);
+ p = emit32(p, (UInt)i->Ain.Imm64.imm64);
+ } else {
+ *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Imm64.dst)));
+ *p++ = toUChar(0xB8 + iregBits210(i->Ain.Imm64.dst));
+ p = emit64(p, i->Ain.Imm64.imm64);
+ }
goto done;
case Ain_Alu64R:
if (i->Ain.Alu64R.op == Aalu_MOV) {
switch (i->Ain.Alu64R.src->tag) {
case Armi_Imm:
- if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFF)) {
+ if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFFFF)) {
/* Actually we could use this form for constants in
the range 0 through 0x7FFFFFFF inclusive, but
limit it to a small range for verifiability
}
break;
- case Ain_MovZLQ:
- /* Produce a 32-bit reg-reg move, since the implicit zero-extend
- does what we want. */
- *p++ = clearWBit (
- rexAMode_R(i->Ain.MovZLQ.src, i->Ain.MovZLQ.dst));
- *p++ = 0x89;
- p = doAMode_R(p, i->Ain.MovZLQ.src, i->Ain.MovZLQ.dst);
+ case Ain_MovxLQ:
+ /* No, _don't_ ask me why the sense of the args has to be
+ different in the S vs Z case. I don't know. */
+ if (i->Ain.MovxLQ.syned) {
+ /* Need REX.W = 1 here, but rexAMode_R does that for us. */
+ *p++ = rexAMode_R(i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
+ *p++ = 0x63;
+ p = doAMode_R(p, i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
+ } else {
+ /* Produce a 32-bit reg-reg move, since the implicit
+ zero-extend does what we want. */
+ *p++ = clearWBit (
+ rexAMode_R(i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst));
+ *p++ = 0x89;
+ p = doAMode_R(p, i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst);
+ }
goto done;
case Ain_LoadEX: