2 * i386 micro operations (included several times to generate
3 * different operand sizes)
5 * Copyright (c) 2003 Fabrice Bellard
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #define DATA_BITS (1 << (3 + SHIFT))
22 #define SHIFT_MASK (DATA_BITS - 1)
23 #define SIGN_MASK (1 << (DATA_BITS - 1))
27 #define DATA_TYPE uint8_t
28 #define DATA_STYPE int8_t
29 #define DATA_MASK 0xff
32 #define DATA_TYPE uint16_t
33 #define DATA_STYPE int16_t
34 #define DATA_MASK 0xffff
37 #define DATA_TYPE uint32_t
38 #define DATA_STYPE int32_t
39 #define DATA_MASK 0xffffffff
41 #error unhandled operand size
44 /* dynamic flags computation */
46 static int glue(compute_all_add, SUFFIX)(void)
48 int cf, pf, af, zf, sf, of;
51 src2 = CC_DST - CC_SRC;
52 cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
53 pf = parity_table[(uint8_t)CC_DST];
54 af = (CC_DST ^ src1 ^ src2) & 0x10;
55 zf = ((DATA_TYPE)CC_DST == 0) << 6;
56 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
57 of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
58 return cf | pf | af | zf | sf | of;
61 static int glue(compute_c_add, SUFFIX)(void)
65 cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
69 static int glue(compute_all_adc, SUFFIX)(void)
71 int cf, pf, af, zf, sf, of;
74 src2 = CC_DST - CC_SRC - 1;
75 cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1;
76 pf = parity_table[(uint8_t)CC_DST];
77 af = (CC_DST ^ src1 ^ src2) & 0x10;
78 zf = ((DATA_TYPE)CC_DST == 0) << 6;
79 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
80 of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
81 return cf | pf | af | zf | sf | of;
84 static int glue(compute_c_adc, SUFFIX)(void)
88 cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1;
92 static int glue(compute_all_sub, SUFFIX)(void)
94 int cf, pf, af, zf, sf, of;
97 src2 = CC_SRC - CC_DST;
98 cf = (DATA_TYPE)src1 < (DATA_TYPE)src2;
99 pf = parity_table[(uint8_t)CC_DST];
100 af = (CC_DST ^ src1 ^ src2) & 0x10;
101 zf = ((DATA_TYPE)CC_DST == 0) << 6;
102 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
103 of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
104 return cf | pf | af | zf | sf | of;
107 static int glue(compute_c_sub, SUFFIX)(void)
111 src2 = CC_SRC - CC_DST;
112 cf = (DATA_TYPE)src1 < (DATA_TYPE)src2;
116 static int glue(compute_all_sbb, SUFFIX)(void)
118 int cf, pf, af, zf, sf, of;
121 src2 = CC_SRC - CC_DST - 1;
122 cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2;
123 pf = parity_table[(uint8_t)CC_DST];
124 af = (CC_DST ^ src1 ^ src2) & 0x10;
125 zf = ((DATA_TYPE)CC_DST == 0) << 6;
126 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
127 of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
128 return cf | pf | af | zf | sf | of;
131 static int glue(compute_c_sbb, SUFFIX)(void)
135 src2 = CC_SRC - CC_DST - 1;
136 cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2;
140 static int glue(compute_all_logic, SUFFIX)(void)
142 int cf, pf, af, zf, sf, of;
144 pf = parity_table[(uint8_t)CC_DST];
146 zf = ((DATA_TYPE)CC_DST == 0) << 6;
147 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
149 return cf | pf | af | zf | sf | of;
152 static int glue(compute_c_logic, SUFFIX)(void)
157 static int glue(compute_all_inc, SUFFIX)(void)
159 int cf, pf, af, zf, sf, of;
164 pf = parity_table[(uint8_t)CC_DST];
165 af = (CC_DST ^ src1 ^ src2) & 0x10;
166 zf = ((DATA_TYPE)CC_DST == 0) << 6;
167 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
168 of = ((CC_DST & DATA_MASK) == SIGN_MASK) << 11;
169 return cf | pf | af | zf | sf | of;
173 static int glue(compute_c_inc, SUFFIX)(void)
179 static int glue(compute_all_dec, SUFFIX)(void)
181 int cf, pf, af, zf, sf, of;
186 pf = parity_table[(uint8_t)CC_DST];
187 af = (CC_DST ^ src1 ^ src2) & 0x10;
188 zf = ((DATA_TYPE)CC_DST == 0) << 6;
189 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
190 of = ((CC_DST & DATA_MASK) == ((uint32_t)SIGN_MASK - 1)) << 11;
191 return cf | pf | af | zf | sf | of;
194 static int glue(compute_all_shl, SUFFIX)(void)
196 int cf, pf, af, zf, sf, of;
197 cf = (CC_SRC >> (DATA_BITS - 1)) & CC_C;
198 pf = parity_table[(uint8_t)CC_DST];
199 af = 0; /* undefined */
200 zf = ((DATA_TYPE)CC_DST == 0) << 6;
201 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
202 /* of is defined if shift count == 1 */
203 of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O;
204 return cf | pf | af | zf | sf | of;
208 static int glue(compute_c_shl, SUFFIX)(void)
214 static int glue(compute_all_sar, SUFFIX)(void)
216 int cf, pf, af, zf, sf, of;
218 pf = parity_table[(uint8_t)CC_DST];
219 af = 0; /* undefined */
220 zf = ((DATA_TYPE)CC_DST == 0) << 6;
221 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
222 /* of is defined if shift count == 1 */
223 of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O;
224 return cf | pf | af | zf | sf | of;
227 /* various optimized jumps cases */
229 void OPPROTO glue(op_jb_sub, SUFFIX)(void)
233 src2 = CC_SRC - CC_DST;
235 if ((DATA_TYPE)src1 < (DATA_TYPE)src2)
242 void OPPROTO glue(op_jz_sub, SUFFIX)(void)
244 if ((DATA_TYPE)CC_DST == 0)
251 void OPPROTO glue(op_jbe_sub, SUFFIX)(void)
255 src2 = CC_SRC - CC_DST;
257 if ((DATA_TYPE)src1 <= (DATA_TYPE)src2)
264 void OPPROTO glue(op_js_sub, SUFFIX)(void)
266 if (CC_DST & SIGN_MASK)
273 void OPPROTO glue(op_jl_sub, SUFFIX)(void)
277 src2 = CC_SRC - CC_DST;
279 if ((DATA_STYPE)src1 < (DATA_STYPE)src2)
286 void OPPROTO glue(op_jle_sub, SUFFIX)(void)
290 src2 = CC_SRC - CC_DST;
292 if ((DATA_STYPE)src1 <= (DATA_STYPE)src2)
303 void OPPROTO glue(op_loopnz, SUFFIX)(void)
307 eflags = cc_table[CC_OP].compute_all();
308 tmp = (ECX - 1) & DATA_MASK;
309 ECX = (ECX & ~DATA_MASK) | tmp;
310 if (tmp != 0 && !(eflags & CC_Z))
317 void OPPROTO glue(op_loopz, SUFFIX)(void)
321 eflags = cc_table[CC_OP].compute_all();
322 tmp = (ECX - 1) & DATA_MASK;
323 ECX = (ECX & ~DATA_MASK) | tmp;
324 if (tmp != 0 && (eflags & CC_Z))
331 void OPPROTO glue(op_loop, SUFFIX)(void)
334 tmp = (ECX - 1) & DATA_MASK;
335 ECX = (ECX & ~DATA_MASK) | tmp;
343 void OPPROTO glue(op_jecxz, SUFFIX)(void)
345 if ((DATA_TYPE)ECX == 0)
354 /* various optimized set cases */
356 void OPPROTO glue(op_setb_T0_sub, SUFFIX)(void)
360 src2 = CC_SRC - CC_DST;
362 T0 = ((DATA_TYPE)src1 < (DATA_TYPE)src2);
365 void OPPROTO glue(op_setz_T0_sub, SUFFIX)(void)
367 T0 = ((DATA_TYPE)CC_DST == 0);
370 void OPPROTO glue(op_setbe_T0_sub, SUFFIX)(void)
374 src2 = CC_SRC - CC_DST;
376 T0 = ((DATA_TYPE)src1 <= (DATA_TYPE)src2);
379 void OPPROTO glue(op_sets_T0_sub, SUFFIX)(void)
381 T0 = lshift(CC_DST, -(DATA_BITS - 1)) & 1;
384 void OPPROTO glue(op_setl_T0_sub, SUFFIX)(void)
388 src2 = CC_SRC - CC_DST;
390 T0 = ((DATA_STYPE)src1 < (DATA_STYPE)src2);
393 void OPPROTO glue(op_setle_T0_sub, SUFFIX)(void)
397 src2 = CC_SRC - CC_DST;
399 T0 = ((DATA_STYPE)src1 <= (DATA_STYPE)src2);
404 void OPPROTO glue(glue(op_rol, SUFFIX), _T0_T1_cc)(void)
407 count = T1 & SHIFT_MASK;
409 CC_SRC = cc_table[CC_OP].compute_all() & ~(CC_O | CC_C);
412 T0 = (T0 << count) | (T0 >> (DATA_BITS - count));
413 CC_SRC |= (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) |
415 CC_OP = CC_OP_EFLAGS;
420 void OPPROTO glue(glue(op_rol, SUFFIX), _T0_T1)(void)
423 count = T1 & SHIFT_MASK;
426 T0 = (T0 << count) | (T0 >> (DATA_BITS - count));
431 void OPPROTO glue(glue(op_ror, SUFFIX), _T0_T1_cc)(void)
434 count = T1 & SHIFT_MASK;
436 CC_SRC = cc_table[CC_OP].compute_all() & ~(CC_O | CC_C);
439 T0 = (T0 >> count) | (T0 << (DATA_BITS - count));
440 CC_SRC |= (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) |
441 ((T0 >> (DATA_BITS - 1)) & CC_C);
442 CC_OP = CC_OP_EFLAGS;
447 void OPPROTO glue(glue(op_ror, SUFFIX), _T0_T1)(void)
450 count = T1 & SHIFT_MASK;
453 T0 = (T0 >> count) | (T0 << (DATA_BITS - count));
458 void OPPROTO glue(glue(op_rcl, SUFFIX), _T0_T1_cc)(void)
460 int count, res, eflags;
465 count = rclw_table[count];
467 count = rclb_table[count];
470 eflags = cc_table[CC_OP].compute_all();
473 res = (T0 << count) | ((eflags & CC_C) << (count - 1));
475 res |= T0 >> (DATA_BITS + 1 - count);
477 CC_SRC = (eflags & ~(CC_C | CC_O)) |
478 (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) |
479 ((src >> (DATA_BITS - count)) & CC_C);
480 CC_OP = CC_OP_EFLAGS;
485 void OPPROTO glue(glue(op_rcr, SUFFIX), _T0_T1_cc)(void)
487 int count, res, eflags;
492 count = rclw_table[count];
494 count = rclb_table[count];
497 eflags = cc_table[CC_OP].compute_all();
500 res = (T0 >> count) | ((eflags & CC_C) << (DATA_BITS - count));
502 res |= T0 << (DATA_BITS + 1 - count);
504 CC_SRC = (eflags & ~(CC_C | CC_O)) |
505 (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) |
506 ((src >> (count - 1)) & CC_C);
507 CC_OP = CC_OP_EFLAGS;
512 void OPPROTO glue(glue(op_shl, SUFFIX), _T0_T1_cc)(void)
517 CC_SRC = (DATA_TYPE)T0 << (count - 1);
520 CC_OP = CC_OP_SHLB + SHIFT;
525 void OPPROTO glue(glue(op_shl, SUFFIX), _T0_T1)(void)
533 void OPPROTO glue(glue(op_shr, SUFFIX), _T0_T1_cc)(void)
539 CC_SRC = T0 >> (count - 1);
542 CC_OP = CC_OP_SARB + SHIFT;
547 void OPPROTO glue(glue(op_shr, SUFFIX), _T0_T1)(void)
556 void OPPROTO glue(glue(op_sar, SUFFIX), _T0_T1_cc)(void)
561 src = (DATA_STYPE)T0;
562 CC_SRC = src >> (count - 1);
565 CC_OP = CC_OP_SARB + SHIFT;
570 void OPPROTO glue(glue(op_sar, SUFFIX), _T0_T1)(void)
574 src = (DATA_STYPE)T0;
580 /* XXX: overflow flag might be incorrect in some cases in shldw */
581 void OPPROTO glue(glue(op_shld, SUFFIX), _T0_T1_im_cc)(void)
587 res = T1 | (T0 << 16);
588 CC_SRC = res >> (32 - count);
591 res |= T1 << (count - 16);
596 void OPPROTO glue(glue(op_shld, SUFFIX), _T0_T1_ECX_cc)(void)
603 res = T1 | (T0 << 16);
604 CC_SRC = res >> (32 - count);
607 res |= T1 << (count - 16);
610 CC_OP = CC_OP_SARB + SHIFT;
614 void OPPROTO glue(glue(op_shrd, SUFFIX), _T0_T1_im_cc)(void)
620 res = (T0 & 0xffff) | (T1 << 16);
621 CC_SRC = res >> (count - 1);
624 res |= T1 << (32 - count);
630 void OPPROTO glue(glue(op_shrd, SUFFIX), _T0_T1_ECX_cc)(void)
637 res = (T0 & 0xffff) | (T1 << 16);
638 CC_SRC = res >> (count - 1);
641 res |= T1 << (32 - count);
644 CC_OP = CC_OP_SARB + SHIFT;
650 void OPPROTO glue(glue(op_shld, SUFFIX), _T0_T1_im_cc)(void)
656 CC_SRC = T0 << (count - 1);
657 T0 = (T0 << count) | (T1 >> (DATA_BITS - count));
661 void OPPROTO glue(glue(op_shld, SUFFIX), _T0_T1_ECX_cc)(void)
668 CC_SRC = T0 << (count - 1);
669 T0 = (T0 << count) | (T1 >> (DATA_BITS - count));
671 CC_OP = CC_OP_SHLB + SHIFT;
675 void OPPROTO glue(glue(op_shrd, SUFFIX), _T0_T1_im_cc)(void)
681 CC_SRC = T0 >> (count - 1);
682 T0 = (T0 >> count) | (T1 << (DATA_BITS - count));
687 void OPPROTO glue(glue(op_shrd, SUFFIX), _T0_T1_ECX_cc)(void)
694 CC_SRC = T0 >> (count - 1);
695 T0 = (T0 >> count) | (T1 << (DATA_BITS - count));
697 CC_OP = CC_OP_SARB + SHIFT;
702 /* carry add/sub (we only need to set CC_OP differently) */
704 void OPPROTO glue(glue(op_adc, SUFFIX), _T0_T1_cc)(void)
707 cf = cc_table[CC_OP].compute_c();
711 CC_OP = CC_OP_ADDB + SHIFT + cf * 3;
714 void OPPROTO glue(glue(op_sbb, SUFFIX), _T0_T1_cc)(void)
717 cf = cc_table[CC_OP].compute_c();
721 CC_OP = CC_OP_SUBB + SHIFT + cf * 3;
724 void OPPROTO glue(glue(op_cmpxchg, SUFFIX), _T0_T1_EAX_cc)(void)
728 if ((DATA_TYPE)CC_DST == 0) {
731 EAX = (EAX & ~DATA_MASK) | (T0 & DATA_MASK);
739 void OPPROTO glue(glue(op_bt, SUFFIX), _T0_T1_cc)(void)
742 count = T1 & SHIFT_MASK;
743 CC_SRC = T0 >> count;
746 void OPPROTO glue(glue(op_bts, SUFFIX), _T0_T1_cc)(void)
749 count = T1 & SHIFT_MASK;
750 CC_SRC = T0 >> count;
754 void OPPROTO glue(glue(op_btr, SUFFIX), _T0_T1_cc)(void)
757 count = T1 & SHIFT_MASK;
758 CC_SRC = T0 >> count;
762 void OPPROTO glue(glue(op_btc, SUFFIX), _T0_T1_cc)(void)
765 count = T1 & SHIFT_MASK;
766 CC_SRC = T0 >> count;
770 void OPPROTO glue(glue(op_bsf, SUFFIX), _T0_cc)(void)
773 res = T0 & DATA_MASK;
776 while ((res & 1) == 0) {
781 CC_DST = 1; /* ZF = 1 */
783 CC_DST = 0; /* ZF = 1 */
788 void OPPROTO glue(glue(op_bsr, SUFFIX), _T0_cc)(void)
791 res = T0 & DATA_MASK;
793 count = DATA_BITS - 1;
794 while ((res & SIGN_MASK) == 0) {
799 CC_DST = 1; /* ZF = 1 */
801 CC_DST = 0; /* ZF = 1 */
808 /* string operations */
809 /* XXX: maybe use lower level instructions to ease exception handling */
811 void OPPROTO glue(op_movs, SUFFIX)(void)
814 v = glue(ldu, SUFFIX)((void *)ESI);
815 glue(st, SUFFIX)((void *)EDI, v);
816 ESI += (DF << SHIFT);
817 EDI += (DF << SHIFT);
820 void OPPROTO glue(op_rep_movs, SUFFIX)(void)
825 v = glue(ldu, SUFFIX)((void *)ESI);
826 glue(st, SUFFIX)((void *)EDI, v);
834 void OPPROTO glue(op_stos, SUFFIX)(void)
836 glue(st, SUFFIX)((void *)EDI, EAX);
837 EDI += (DF << SHIFT);
840 void OPPROTO glue(op_rep_stos, SUFFIX)(void)
845 glue(st, SUFFIX)((void *)EDI, EAX);
852 void OPPROTO glue(op_lods, SUFFIX)(void)
855 v = glue(ldu, SUFFIX)((void *)ESI);
857 EAX = (EAX & ~0xff) | v;
859 EAX = (EAX & ~0xffff) | v;
863 ESI += (DF << SHIFT);
866 /* don't know if it is used */
867 void OPPROTO glue(op_rep_lods, SUFFIX)(void)
872 v = glue(ldu, SUFFIX)((void *)ESI);
874 EAX = (EAX & ~0xff) | v;
876 EAX = (EAX & ~0xffff) | v;
886 void OPPROTO glue(op_scas, SUFFIX)(void)
890 v = glue(ldu, SUFFIX)((void *)EDI);
891 EDI += (DF << SHIFT);
896 void OPPROTO glue(op_repz_scas, SUFFIX)(void)
901 /* NOTE: the flags are not modified if ECX == 0 */
902 v1 = EAX & DATA_MASK;
905 v2 = glue(ldu, SUFFIX)((void *)EDI);
913 CC_OP = CC_OP_SUBB + SHIFT;
918 void OPPROTO glue(op_repnz_scas, SUFFIX)(void)
923 /* NOTE: the flags are not modified if ECX == 0 */
924 v1 = EAX & DATA_MASK;
927 v2 = glue(ldu, SUFFIX)((void *)EDI);
935 CC_OP = CC_OP_SUBB + SHIFT;
940 void OPPROTO glue(op_cmps, SUFFIX)(void)
943 v1 = glue(ldu, SUFFIX)((void *)ESI);
944 v2 = glue(ldu, SUFFIX)((void *)EDI);
945 ESI += (DF << SHIFT);
946 EDI += (DF << SHIFT);
951 void OPPROTO glue(op_repz_cmps, SUFFIX)(void)
957 v1 = glue(ldu, SUFFIX)((void *)ESI);
958 v2 = glue(ldu, SUFFIX)((void *)EDI);
967 CC_OP = CC_OP_SUBB + SHIFT;
972 void OPPROTO glue(op_repnz_cmps, SUFFIX)(void)
978 v1 = glue(ldu, SUFFIX)((void *)ESI);
979 v2 = glue(ldu, SUFFIX)((void *)EDI);
988 CC_OP = CC_OP_SUBB + SHIFT;
995 void OPPROTO glue(op_outs, SUFFIX)(void)
999 v = glue(ldu, SUFFIX)((void *)ESI);
1000 glue(cpu_x86_out, SUFFIX)(dx, v);
1001 ESI += (DF << SHIFT);
1004 void OPPROTO glue(op_rep_outs, SUFFIX)(void)
1007 inc = (DF << SHIFT);
1010 v = glue(ldu, SUFFIX)((void *)ESI);
1011 glue(cpu_x86_out, SUFFIX)(dx, v);
1018 void OPPROTO glue(op_ins, SUFFIX)(void)
1022 v = glue(cpu_x86_in, SUFFIX)(dx);
1023 glue(st, SUFFIX)((void *)EDI, v);
1024 EDI += (DF << SHIFT);
1027 void OPPROTO glue(op_rep_ins, SUFFIX)(void)
1030 inc = (DF << SHIFT);
1033 v = glue(cpu_x86_in, SUFFIX)(dx);
1034 glue(st, SUFFIX)((void *)EDI, v);
1035 EDI += (DF << SHIFT);
1041 void OPPROTO glue(glue(op_out, SUFFIX), _T0_T1)(void)
1043 glue(cpu_x86_out, SUFFIX)(T0 & 0xffff, T1 & DATA_MASK);
1046 void OPPROTO glue(glue(op_in, SUFFIX), _T0_T1)(void)
1048 T1 = glue(cpu_x86_in, SUFFIX)(T0 & 0xffff);