5 #include "tests/malloc.h"
8 const unsigned int vec0[4]
9 = { 0x12345678, 0x11223344, 0x55667788, 0x87654321 };
11 const unsigned int vec1[4]
12 = { 0xABCDEF01, 0xAABBCCDD, 0xEEFF0011, 0x10FEDCBA };
14 const unsigned int vecZ[4]
17 __attribute__((noinline))
18 void do_fxsave ( void* p, int rexw ) {
20 asm __volatile__("rex64/fxsave (%0)" : : "r" (p) : "memory" );
22 asm __volatile__("fxsave (%0)" : : "r" (p) : "memory" );
26 __attribute__((noinline))
27 void do_fxrstor ( void* p, int rexw ) {
29 asm __volatile__("rex64/fxrstor (%0)" : : "r" (p) : "memory" );
31 asm __volatile__("fxrstor (%0)" : : "r" (p) : "memory" );
35 void do_zeroise ( void )
37 asm __volatile__("finit");
48 #ifndef VGP_amd64_darwin
49 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm0");
50 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm1");
51 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm2");
52 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm3");
53 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm4");
54 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm5");
55 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm6");
56 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm7");
57 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm8");
58 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm9");
59 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm10");
60 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm11");
61 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm12");
62 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm13");
63 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm14");
64 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm15");
66 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm0");
67 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm1");
68 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm2");
69 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm3");
70 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm4");
71 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm5");
72 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm6");
73 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm7");
74 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm8");
75 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm9");
76 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm10");
77 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm11");
78 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm12");
79 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm13");
80 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm14");
81 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm15");
89 /* set up the FP and SSE state, and then dump it. */
90 void do_setup_then_fxsave ( void* p, int rexw )
92 asm __volatile__("finit");
93 asm __volatile__("fldpi");
94 asm __volatile__("fld1");
95 asm __volatile__("fldln2");
96 asm __volatile__("fldlg2");
97 asm __volatile__("fld %st(3)");
98 asm __volatile__("fld %st(3)");
99 asm __volatile__("fld1");
100 asm __volatile__("movups (%0), %%xmm0" : : "r"(&vec0[0]) : "xmm0" );
101 asm __volatile__("movups (%0), %%xmm1" : : "r"(&vec1[0]) : "xmm1" );
102 asm __volatile__("xorps %xmm2, %xmm2");
103 asm __volatile__("movaps %xmm0, %xmm3");
104 asm __volatile__("movaps %xmm1, %xmm4");
105 asm __volatile__("movaps %xmm2, %xmm5");
106 asm __volatile__("movaps %xmm0, %xmm6");
107 asm __volatile__("movaps %xmm1, %xmm7");
108 asm __volatile__("movaps %xmm1, %xmm8");
109 asm __volatile__("movaps %xmm2, %xmm9");
110 asm __volatile__("movaps %xmm0, %xmm10");
111 asm __volatile__("movaps %xmm1, %xmm11");
112 asm __volatile__("movaps %xmm1, %xmm12");
113 asm __volatile__("movaps %xmm2, %xmm13");
114 asm __volatile__("movaps %xmm0, %xmm14");
115 asm __volatile__("movaps %xmm1, %xmm15");
119 int isFPLsbs ( int i )
122 q = 32; if (i == q || i == q+1) return 1;
123 q = 48; if (i == q || i == q+1) return 1;
124 q = 64; if (i == q || i == q+1) return 1;
125 q = 80; if (i == q || i == q+1) return 1;
126 q = 96; if (i == q || i == q+1) return 1;
127 q = 112; if (i == q || i == q+1) return 1;
128 q = 128; if (i == q || i == q+1) return 1;
129 q = 144; if (i == q || i == q+1) return 1;
133 void show ( unsigned char* buf, int xx )
136 for (i = 0; i < 512; i++) {
139 if (xx && isFPLsbs(i))
142 printf("%02x ", buf[i]);
143 if (i > 0 && ((i % 16) == 15))
149 int main ( int argc, char** argv )
151 unsigned char* buf1 = memalign16(512);
152 unsigned char* buf2 = memalign16(512);
153 unsigned char* buf3 = memalign16(512);
155 printf("Re-run with any arg to suppress least-significant\n"
156 " 16 bits of FP numbers\n");
158 printf("\n-------- FXSAVE non-64 (REX.W == 0) --------\n");
160 memset(buf1, 0x55, 512);
161 memset(buf2, 0x55, 512);
162 memset(buf3, 0x55, 512);
164 /* Load up x87/xmm state and dump it. */
165 do_setup_then_fxsave(buf1, 0);
166 printf("\nBEFORE\n");
169 /* Zeroise x87/xmm state and dump it, to show that the
170 regs have been cleared out. */
173 printf("\nZEROED\n");
176 /* Reload x87/xmm state from buf1 and dump it in buf3. */
179 printf("\nRESTORED\n");
182 printf("\n-------- FXSAVE 64 (REX.W == 1) --------\n\n");
184 memset(buf1, 0x55, 512);
185 memset(buf2, 0x55, 512);
186 memset(buf3, 0x55, 512);
188 /* Load up x87/xmm state and dump it. */
189 do_setup_then_fxsave(buf1, 1);
190 printf("\nBEFORE\n");
193 /* Zeroise x87/xmm state and dump it, to show that the
194 regs have been cleared out. */
197 printf("\nZEROED\n");
200 /* Reload x87/xmm state from buf1 and dump it in buf3. */
203 printf("\nRESTORED\n");
207 free(buf1); free(buf2); free(buf3);