]> rtime.felk.cvut.cz Git - l4.git/blob - l4/pkg/valgrind/src/valgrind-3.6.0-svn/none/tests/amd64/pcmpstr64.c
update
[l4.git] / l4 / pkg / valgrind / src / valgrind-3.6.0-svn / none / tests / amd64 / pcmpstr64.c
1
2 /* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using
3    pcmpistri to drive it.  Does not check the e-vs-i or i-vs-m
4    aspect. */
5
6 #include <string.h>
7 #include <stdio.h>
8 #include <assert.h>
9
10 typedef  unsigned int   UInt;
11 typedef  signed int     Int;
12 typedef  unsigned char  UChar;
13 typedef  unsigned long long int ULong;
14 typedef  UChar          Bool;
15 #define False ((Bool)0)
16 #define True  ((Bool)1)
17
18 //typedef  unsigned char  V128[16];
19 typedef
20    union {
21       UChar uChar[16];
22       UInt  uInt[4];
23    }
24    V128;
25
26 #define SHIFT_O   11
27 #define SHIFT_S   7
28 #define SHIFT_Z   6
29 #define SHIFT_A   4
30 #define SHIFT_C   0
31 #define SHIFT_P   2
32
33 #define MASK_O    (1ULL << SHIFT_O)
34 #define MASK_S    (1ULL << SHIFT_S)
35 #define MASK_Z    (1ULL << SHIFT_Z)
36 #define MASK_A    (1ULL << SHIFT_A)
37 #define MASK_C    (1ULL << SHIFT_C)
38 #define MASK_P    (1ULL << SHIFT_P)
39
40
41 UInt clz32 ( UInt x )
42 {
43    Int y, m, n;
44    y = -(x >> 16);
45    m = (y >> 16) & 16;
46    n = 16 - m;
47    x = x >> m;
48    y = x - 0x100;
49    m = (y >> 16) & 8;
50    n = n + m;
51    x = x << m;
52    y = x - 0x1000;
53    m = (y >> 16) & 4;
54    n = n + m;
55    x = x << m;
56    y = x - 0x4000;
57    m = (y >> 16) & 2;
58    n = n + m;
59    x = x << m;
60    y = x >> 14;
61    m = y & ~(y >> 1);
62    return n + 2 - m;
63 }
64
65 UInt ctz32 ( UInt x )
66 {
67    return 32 - clz32((~x) & (x-1));
68 }
69
70 void expand ( V128* dst, char* summary )
71 {
72    Int i;
73    assert( strlen(summary) == 16 );
74    for (i = 0; i < 16; i++) {
75       UChar xx = 0;
76       UChar x = summary[15-i];
77       if      (x >= '0' && x <= '9') { xx = x - '0'; }
78       else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
79       else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
80       else assert(0);
81
82       assert(xx < 16);
83       xx = (xx << 4) | xx;
84       assert(xx < 256);
85       dst->uChar[i] = xx;
86    }
87 }
88
89 void try_istri ( char* which,
90                  UInt(*h_fn)(V128*,V128*),
91                  UInt(*s_fn)(V128*,V128*),
92                  char* summL, char* summR )
93 {
94    assert(strlen(which) == 2);
95    V128 argL, argR;
96    expand(&argL, summL);
97    expand(&argR, summR);
98    UInt h_res = h_fn(&argL, &argR);
99    UInt s_res = s_fn(&argL, &argR);
100    printf("istri %s  %s %s -> %08x %08x %s\n",
101           which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!");
102 }
103
104 UInt zmask_from_V128 ( V128* arg )
105 {
106    UInt i, res = 0;
107    for (i = 0; i < 16; i++) {
108       res |=  ((arg->uChar[i] == 0) ? 1 : 0) << i;
109    }
110    return res;
111 }
112
113 //////////////////////////////////////////////////////////
114 //                                                      //
115 //                       GENERAL                        //
116 //                                                      //
117 //////////////////////////////////////////////////////////
118
119
120 /* Given partial results from a pcmpXstrX operation (intRes1,
121    basically), generate an I format (index value for ECX) output, and
122    also the new OSZACP flags.
123 */
124 static
125 void pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128* resV,
126                                     /*OUT*/UInt* resOSZACP,
127                                     UInt intRes1,
128                                     UInt zmaskL, UInt zmaskR,
129                                     UInt validL,
130                                     UInt pol, UInt idx )
131 {
132    assert((pol >> 2) == 0);
133    assert((idx >> 1) == 0);
134
135    UInt intRes2 = 0;
136    switch (pol) {
137       case 0: intRes2 = intRes1;          break; // pol +
138       case 1: intRes2 = ~intRes1;         break; // pol -
139       case 2: intRes2 = intRes1;          break; // pol m+
140       case 3: intRes2 = intRes1 ^ validL; break; // pol m-
141    }
142    intRes2 &= 0xFFFF;
143
144    // generate ecx value
145    UInt newECX = 0;
146    if (idx) {
147      // index of ms-1-bit
148      newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
149    } else {
150      // index of ls-1-bit
151      newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
152    }
153
154    *(UInt*)(&resV[0]) = newECX;
155
156    // generate new flags, common to all ISTRI and ISTRM cases
157    *resOSZACP    // A, P are zero
158      = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
159      | ((zmaskL == 0)  ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
160      | ((zmaskR == 0)  ? 0 : MASK_S) // S == 1 iff any in argR is 0
161      | ((intRes2 & 1) << SHIFT_O);   // O == IntRes2[0]
162 }
163
164
165 /* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
166    variants.
167
168    For xSTRI variants, the new ECX value is placed in the 32 bits
169    pointed to by *resV.  For xSTRM variants, the result is a 128 bit
170    value and is placed at *resV in the obvious way.
171
172    For all variants, the new OSZACP value is placed at *resOSZACP.
173
174    argLV and argRV are the vector args.  The caller must prepare a
175    16-bit mask for each, zmaskL and zmaskR.  For ISTRx variants this
176    must be 1 for each zero byte of of the respective arg.  For ESTRx
177    variants this is derived from the explicit length indication, and
178    must be 0 in all places except at the bit index corresponding to
179    the valid length (0 .. 16).  If the valid length is 16 then the
180    mask must be all zeroes.  In all cases, bits 31:16 must be zero.
181
182    imm8 is the original immediate from the instruction.  isSTRM
183    indicates whether this is a xSTRM or xSTRI variant, which controls
184    how much of *res is written.
185
186    If the given imm8 case can be handled, the return value is True.
187    If not, False is returned, and neither *res not *resOSZACP are
188    altered.
189 */
190
191 Bool pcmpXstrX_WRK ( /*OUT*/V128* resV,
192                      /*OUT*/UInt* resOSZACP,
193                      V128* argLV,  V128* argRV,
194                      UInt zmaskL, UInt zmaskR,
195                      UInt imm8,   Bool isSTRM )
196 {
197    assert(imm8 < 0x80);
198    assert((zmaskL >> 16) == 0);
199    assert((zmaskR >> 16) == 0);
200
201    /* Explicitly reject any imm8 values that haven't been validated,
202       even if they would probably work.  Life is too short to have
203       unvalidated cases in the code base. */
204    switch (imm8) {
205       case 0x00:
206       case 0x02: case 0x08: case 0x0C: case 0x12: case 0x1A:
207       case 0x3A: case 0x44: case 0x4A:
208          break;
209       default:
210          return False;
211    }
212
213    UInt fmt = (imm8 >> 0) & 3; // imm8[1:0]  data format
214    UInt agg = (imm8 >> 2) & 3; // imm8[3:2]  aggregation fn
215    UInt pol = (imm8 >> 4) & 3; // imm8[5:4]  polarity
216    UInt idx = (imm8 >> 6) & 1; // imm8[6]    1==msb/bytemask
217
218    /*----------------------------------------*/
219    /*-- strcmp on byte data                --*/
220    /*----------------------------------------*/
221
222    if (agg == 2/*equal each, aka strcmp*/
223        && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
224        && !isSTRM) {
225       Int    i;
226       UChar* argL = (UChar*)argLV;
227       UChar* argR = (UChar*)argRV;
228       UInt boolResII = 0;
229       for (i = 15; i >= 0; i--) {
230          UChar cL  = argL[i];
231          UChar cR  = argR[i];
232          boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
233       }
234       UInt validL = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
235       UInt validR = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
236
237       // do invalidation, common to all equal-each cases
238       UInt intRes1
239          = (boolResII & validL & validR)  // if both valid, use cmpres
240            | (~ (validL | validR));       // if both invalid, force 1
241                                           // else force 0
242       intRes1 &= 0xFFFF;
243
244       // generate I-format output
245       pcmpXstrX_WRK_gen_output_fmt_I(
246          resV, resOSZACP,
247          intRes1, zmaskL, zmaskR, validL, pol, idx
248       );
249
250       return True;
251    }
252
253    /*----------------------------------------*/
254    /*-- set membership on byte data        --*/
255    /*----------------------------------------*/
256
257    if (agg == 0/*equal any, aka find chars in a set*/
258        && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
259        && !isSTRM) {
260       /* argL: the string,  argR: charset */
261       UInt   si, ci;
262       UChar* argL    = (UChar*)argLV;
263       UChar* argR    = (UChar*)argRV;
264       UInt   boolRes = 0;
265       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
266       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
267
268       for (si = 0; si < 16; si++) {
269          if ((validL & (1 << si)) == 0)
270             // run off the end of the string.
271             break;
272          UInt m = 0;
273          for (ci = 0; ci < 16; ci++) {
274             if ((validR & (1 << ci)) == 0) break;
275             if (argR[ci] == argL[si]) { m = 1; break; }
276          }
277          boolRes |= (m << si);
278       }
279
280       // boolRes is "pre-invalidated"
281       UInt intRes1 = boolRes & 0xFFFF;
282    
283       // generate I-format output
284       pcmpXstrX_WRK_gen_output_fmt_I(
285          resV, resOSZACP,
286          intRes1, zmaskL, zmaskR, validL, pol, idx
287       );
288
289       return True;
290    }
291
292    /*----------------------------------------*/
293    /*-- substring search on byte data      --*/
294    /*----------------------------------------*/
295
296    if (agg == 3/*equal ordered, aka substring search*/
297        && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
298        && !isSTRM) {
299
300       /* argL: haystack,  argR: needle */
301       UInt   ni, hi;
302       UChar* argL    = (UChar*)argLV;
303       UChar* argR    = (UChar*)argRV;
304       UInt   boolRes = 0;
305       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
306       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
307       for (hi = 0; hi < 16; hi++) {
308          if ((validL & (1 << hi)) == 0)
309             // run off the end of the haystack
310             break;
311          UInt m = 1;
312          for (ni = 0; ni < 16; ni++) {
313             if ((validR & (1 << ni)) == 0) break;
314             UInt i = ni + hi;
315             if (i >= 16) break;
316             if (argL[i] != argR[ni]) { m = 0; break; }
317          }
318          boolRes |= (m << hi);
319       }
320
321       // boolRes is "pre-invalidated"
322       UInt intRes1 = boolRes & 0xFFFF;
323
324       // generate I-format output
325       pcmpXstrX_WRK_gen_output_fmt_I(
326          resV, resOSZACP,
327          intRes1, zmaskL, zmaskR, validL, pol, idx
328       );
329
330       return True;
331    }
332
333    /*----------------------------------------*/
334    /*-- ranges, unsigned byte data         --*/
335    /*----------------------------------------*/
336
337    if (agg == 1/*ranges*/
338        && fmt == 0/*ub*/
339        && !isSTRM) {
340
341       /* argL: string,  argR: range-pairs */
342       UInt   ri, si;
343       UChar* argL    = (UChar*)argLV;
344       UChar* argR    = (UChar*)argRV;
345       UInt   boolRes = 0;
346       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
347       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
348       for (si = 0; si < 16; si++) {
349          if ((validL & (1 << si)) == 0)
350             // run off the end of the string
351             break;
352          UInt m = 0;
353          for (ri = 0; ri < 16; ri += 2) {
354             if ((validR & (3 << ri)) != (3 << ri)) break;
355             if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { 
356                m = 1; break;
357             }
358          }
359          boolRes |= (m << si);
360       }
361
362       // boolRes is "pre-invalidated"
363       UInt intRes1 = boolRes & 0xFFFF;
364
365       // generate I-format output
366       pcmpXstrX_WRK_gen_output_fmt_I(
367          resV, resOSZACP,
368          intRes1, zmaskL, zmaskR, validL, pol, idx
369       );
370
371       return True;
372    }
373
374    return False;
375 }
376
377
378 //////////////////////////////////////////////////////////
379 //                                                      //
380 //                       ISTRI_4A                       //
381 //                                                      //
382 //////////////////////////////////////////////////////////
383
384 UInt h_pcmpistri_4A ( V128* argL, V128* argR )
385 {
386    V128 block[2];
387    memcpy(&block[0], argL, sizeof(V128));
388    memcpy(&block[1], argR, sizeof(V128));
389    ULong res, flags;
390    __asm__ __volatile__(
391       "subq      $1024,  %%rsp"             "\n\t"
392       "movdqu    0(%2),  %%xmm2"            "\n\t"
393       "movdqu    16(%2), %%xmm11"           "\n\t"
394       "pcmpistri $0x4A,  %%xmm2, %%xmm11"   "\n\t"
395       "pushfq"                              "\n\t"
396       "popq      %%rdx"                     "\n\t"
397       "movq      %%rcx,  %0"                "\n\t"
398       "movq      %%rdx,  %1"                "\n\t"
399       "addq      $1024,  %%rsp"             "\n\t"
400       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
401       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
402    );
403    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
404 }
405
406 UInt s_pcmpistri_4A ( V128* argLU, V128* argRU )
407 {
408    V128 resV;
409    UInt resOSZACP, resECX;
410    Bool ok
411       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
412                        zmask_from_V128(argLU),
413                        zmask_from_V128(argRU),
414                        0x4A, False/*!isSTRM*/
415         );
416    assert(ok);
417    resECX = resV.uInt[0];
418    return (resOSZACP << 16) | resECX;
419 }
420
421 void istri_4A ( void )
422 {
423    char* wot = "4A";
424    UInt(*h)(V128*,V128*) = h_pcmpistri_4A;
425    UInt(*s)(V128*,V128*) = s_pcmpistri_4A;
426
427    try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 
428
429    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
430    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
431    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 
432    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 
433
434    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 
435    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 
436    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 
437
438    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
439    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
440    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
441    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
442
443    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
444    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 
445    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 
446
447    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
448
449    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 
450    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 
451    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 
452
453    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 
454    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 
455    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 
456
457    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 
458    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 
459    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 
460
461    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 
462    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 
463    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 
464
465    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 
466    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 
467 }
468
469 //////////////////////////////////////////////////////////
470 //                                                      //
471 //                       ISTRI_3A                       //
472 //                                                      //
473 //////////////////////////////////////////////////////////
474
475 UInt h_pcmpistri_3A ( V128* argL, V128* argR )
476 {
477    V128 block[2];
478    memcpy(&block[0], argL, sizeof(V128));
479    memcpy(&block[1], argR, sizeof(V128));
480    ULong res, flags;
481    __asm__ __volatile__(
482       "subq      $1024,  %%rsp"             "\n\t"
483       "movdqu    0(%2),  %%xmm2"            "\n\t"
484       "movdqu    16(%2), %%xmm11"           "\n\t"
485       "pcmpistri $0x3A,  %%xmm2, %%xmm11"   "\n\t"
486       "pushfq"                              "\n\t"
487       "popq      %%rdx"                     "\n\t"
488       "movq      %%rcx,  %0"                "\n\t"
489       "movq      %%rdx,  %1"                "\n\t"
490       "addq      $1024,  %%rsp"             "\n\t"
491       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
492       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
493    );
494    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
495 }
496
497 UInt s_pcmpistri_3A ( V128* argLU, V128* argRU )
498 {
499    V128 resV;
500    UInt resOSZACP, resECX;
501    Bool ok
502       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
503                        zmask_from_V128(argLU),
504                        zmask_from_V128(argRU),
505                        0x3A, False/*!isSTRM*/
506         );
507    assert(ok);
508    resECX = resV.uInt[0];
509    return (resOSZACP << 16) | resECX;
510 }
511
512 void istri_3A ( void )
513 {
514    char* wot = "3A";
515    UInt(*h)(V128*,V128*) = h_pcmpistri_3A;
516    UInt(*s)(V128*,V128*) = s_pcmpistri_3A;
517
518    try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 
519
520    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
521    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
522    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 
523    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 
524
525    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 
526    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 
527    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 
528
529    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
530    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
531    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
532    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
533
534    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
535    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 
536    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 
537
538    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
539
540    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 
541    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 
542    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 
543
544    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 
545    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 
546    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 
547
548    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 
549    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 
550    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 
551
552    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 
553    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 
554    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 
555
556    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 
557    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 
558 }
559
560
561
562 //////////////////////////////////////////////////////////
563 //                                                      //
564 //                       ISTRI_0C                       //
565 //                                                      //
566 //////////////////////////////////////////////////////////
567
568 __attribute__((noinline))
569 UInt h_pcmpistri_0C ( V128* argL, V128* argR )
570 {
571    V128 block[2];
572    memcpy(&block[0], argL, sizeof(V128));
573    memcpy(&block[1], argR, sizeof(V128));
574    ULong res = 0, flags = 0;
575    __asm__ __volatile__(
576       "movdqa    0(%2),  %%xmm2"            "\n\t"
577       "movdqa    16(%2), %%xmm11"           "\n\t"
578       "pcmpistri $0x0C,  %%xmm2, %%xmm11"   "\n\t"
579       //"pcmpistrm $0x0C,  %%xmm2, %%xmm11"   "\n\t"
580       //"movd %%xmm0, %%ecx" "\n\t"
581       "pushfq"                              "\n\t"
582       "popq      %%rdx"                     "\n\t"
583       "movq      %%rcx,  %0"                "\n\t"
584       "movq      %%rdx,  %1"                "\n\t"
585       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
586       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
587    );
588    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
589 }
590
591 UInt s_pcmpistri_0C ( V128* argLU, V128* argRU )
592 {
593    V128 resV;
594    UInt resOSZACP, resECX;
595    Bool ok
596       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
597                        zmask_from_V128(argLU),
598                        zmask_from_V128(argRU),
599                        0x0C, False/*!isSTRM*/
600         );
601    assert(ok);
602    resECX = resV.uInt[0];
603    return (resOSZACP << 16) | resECX;
604 }
605
606 void istri_0C ( void )
607 {
608    char* wot = "0C";
609    UInt(*h)(V128*,V128*) = h_pcmpistri_0C;
610    UInt(*s)(V128*,V128*) = s_pcmpistri_0C;
611    
612    try_istri(wot,h,s, "111111111abcde11", "00000000000abcde"); 
613
614    try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde"); 
615
616    try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde"); 
617    try_istri(wot,h,s, "11111111111abcde", "00000000000abcde"); 
618    try_istri(wot,h,s, "111111111111abcd", "00000000000abcde"); 
619
620    try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde"); 
621
622    try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde"); 
623    try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde"); 
624    try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde"); 
625    try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde"); 
626    try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde"); 
627
628    try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde"); 
629    try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde"); 
630    try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde"); 
631
632    try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde"); 
633    try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde"); 
634
635    try_istri(wot,h,s, "1111111111111234", "0000000000000000"); 
636    try_istri(wot,h,s, "1111111111111234", "0000000000000001"); 
637    try_istri(wot,h,s, "1111111111111234", "0000000000000011"); 
638
639    try_istri(wot,h,s, "1111111111111234", "1111111111111234"); 
640    try_istri(wot,h,s, "a111111111111111", "000000000000000a"); 
641    try_istri(wot,h,s, "b111111111111111", "000000000000000a"); 
642 }
643
644
645 //////////////////////////////////////////////////////////
646 //                                                      //
647 //                       ISTRI_08                       //
648 //                                                      //
649 //////////////////////////////////////////////////////////
650
651 UInt h_pcmpistri_08 ( V128* argL, V128* argR )
652 {
653    V128 block[2];
654    memcpy(&block[0], argL, sizeof(V128));
655    memcpy(&block[1], argR, sizeof(V128));
656    ULong res, flags;
657    __asm__ __volatile__(
658       "subq      $1024,  %%rsp"             "\n\t"
659       "movdqu    0(%2),  %%xmm2"            "\n\t"
660       "movdqu    16(%2), %%xmm11"           "\n\t"
661       "pcmpistri $0x08,  %%xmm2, %%xmm11"   "\n\t"
662       "pushfq"                              "\n\t"
663       "popq      %%rdx"                     "\n\t"
664       "movq      %%rcx,  %0"                "\n\t"
665       "movq      %%rdx,  %1"                "\n\t"
666       "addq      $1024,  %%rsp"             "\n\t"
667       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
668       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
669    );
670    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
671 }
672
673 UInt s_pcmpistri_08 ( V128* argLU, V128* argRU )
674 {
675    V128 resV;
676    UInt resOSZACP, resECX;
677    Bool ok
678       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
679                        zmask_from_V128(argLU),
680                        zmask_from_V128(argRU),
681                        0x08, False/*!isSTRM*/
682         );
683    assert(ok);
684    resECX = resV.uInt[0];
685    return (resOSZACP << 16) | resECX;
686 }
687
688 void istri_08 ( void )
689 {
690    char* wot = "08";
691    UInt(*h)(V128*,V128*) = h_pcmpistri_08;
692    UInt(*s)(V128*,V128*) = s_pcmpistri_08;
693
694    try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 
695
696    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
697    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
698    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 
699    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 
700
701    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 
702    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 
703    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 
704
705    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
706    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
707    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
708    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
709
710    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
711    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 
712    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 
713
714    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
715
716    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 
717    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 
718    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 
719
720    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 
721    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 
722    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 
723
724    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 
725    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 
726    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 
727
728    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 
729    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 
730    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 
731
732    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 
733    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 
734 }
735
736
737
738 //////////////////////////////////////////////////////////
739 //                                                      //
740 //                       ISTRI_1A                       //
741 //                                                      //
742 //////////////////////////////////////////////////////////
743
744 UInt h_pcmpistri_1A ( V128* argL, V128* argR )
745 {
746    V128 block[2];
747    memcpy(&block[0], argL, sizeof(V128));
748    memcpy(&block[1], argR, sizeof(V128));
749    ULong res, flags;
750    __asm__ __volatile__(
751       "subq      $1024,  %%rsp"             "\n\t"
752       "movdqu    0(%2),  %%xmm2"            "\n\t"
753       "movdqu    16(%2), %%xmm11"           "\n\t"
754       "pcmpistri $0x1A,  %%xmm2, %%xmm11"   "\n\t"
755       "pushfq"                              "\n\t"
756       "popq      %%rdx"                     "\n\t"
757       "movq      %%rcx,  %0"                "\n\t"
758       "movq      %%rdx,  %1"                "\n\t"
759       "addq      $1024,  %%rsp"             "\n\t"
760       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
761       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
762    );
763    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
764 }
765
766 UInt s_pcmpistri_1A ( V128* argLU, V128* argRU )
767 {
768    V128 resV;
769    UInt resOSZACP, resECX;
770    Bool ok
771       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
772                        zmask_from_V128(argLU),
773                        zmask_from_V128(argRU),
774                        0x1A, False/*!isSTRM*/
775         );
776    assert(ok);
777    resECX = resV.uInt[0];
778    return (resOSZACP << 16) | resECX;
779 }
780
781 void istri_1A ( void )
782 {
783    char* wot = "1A";
784    UInt(*h)(V128*,V128*) = h_pcmpistri_1A;
785    UInt(*s)(V128*,V128*) = s_pcmpistri_1A;
786
787    try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 
788
789    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
790    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
791    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); 
792    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); 
793
794    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); 
795    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); 
796    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); 
797
798    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
799    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
800    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
801    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
802
803    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
804    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); 
805    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); 
806
807    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
808
809    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 
810    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 
811    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); 
812
813    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); 
814    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); 
815    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); 
816
817    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); 
818    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); 
819    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); 
820
821    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); 
822    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); 
823    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); 
824
825    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); 
826    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); 
827 }
828
829
830
831 //////////////////////////////////////////////////////////
832 //                                                      //
833 //                       ISTRI_02                       //
834 //                                                      //
835 //////////////////////////////////////////////////////////
836
837 UInt h_pcmpistri_02 ( V128* argL, V128* argR )
838 {
839    V128 block[2];
840    memcpy(&block[0], argL, sizeof(V128));
841    memcpy(&block[1], argR, sizeof(V128));
842    ULong res, flags;
843    __asm__ __volatile__(
844       "subq      $1024,  %%rsp"             "\n\t"
845       "movdqu    0(%2),  %%xmm2"            "\n\t"
846       "movdqu    16(%2), %%xmm11"           "\n\t"
847       "pcmpistri $0x02,  %%xmm2, %%xmm11"   "\n\t"
848 //"pcmpistrm $0x02, %%xmm2, %%xmm11"   "\n\t"
849 //"movd %%xmm0, %%ecx" "\n\t"
850       "pushfq"                              "\n\t"
851       "popq      %%rdx"                     "\n\t"
852       "movq      %%rcx,  %0"                "\n\t"
853       "movq      %%rdx,  %1"                "\n\t"
854       "addq      $1024,  %%rsp"             "\n\t"
855       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
856       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
857    );
858    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
859 }
860
861 UInt s_pcmpistri_02 ( V128* argLU, V128* argRU )
862 {
863    V128 resV;
864    UInt resOSZACP, resECX;
865    Bool ok
866       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
867                        zmask_from_V128(argLU),
868                        zmask_from_V128(argRU),
869                        0x02, False/*!isSTRM*/
870         );
871    assert(ok);
872    resECX = resV.uInt[0];
873    return (resOSZACP << 16) | resECX;
874 }
875
876 void istri_02 ( void )
877 {
878    char* wot = "02";
879    UInt(*h)(V128*,V128*) = h_pcmpistri_02;
880    UInt(*s)(V128*,V128*) = s_pcmpistri_02;
881
882    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 
883    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 
884    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 
885    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 
886
887    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
888    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 
889    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 
890    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 
891    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 
892
893    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
894    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 
895    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 
896    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 
897
898    try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 
899    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
900
901    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 
902    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 
903    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 
904    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 
905
906    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 
907
908    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 
909    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 
910 }
911
912
913 //////////////////////////////////////////////////////////
914 //                                                      //
915 //                       ISTRI_12                       //
916 //                                                      //
917 //////////////////////////////////////////////////////////
918
919 UInt h_pcmpistri_12 ( V128* argL, V128* argR )
920 {
921    V128 block[2];
922    memcpy(&block[0], argL, sizeof(V128));
923    memcpy(&block[1], argR, sizeof(V128));
924    ULong res, flags;
925    __asm__ __volatile__(
926       "subq      $1024,  %%rsp"             "\n\t"
927       "movdqu    0(%2),  %%xmm2"            "\n\t"
928       "movdqu    16(%2), %%xmm11"           "\n\t"
929       "pcmpistri $0x12,  %%xmm2, %%xmm11"   "\n\t"
930 //"pcmpistrm $0x12, %%xmm2, %%xmm11"   "\n\t"
931 //"movd %%xmm0, %%ecx" "\n\t"
932       "pushfq"                              "\n\t"
933       "popq      %%rdx"                     "\n\t"
934       "movq      %%rcx,  %0"                "\n\t"
935       "movq      %%rdx,  %1"                "\n\t"
936       "addq      $1024,  %%rsp"             "\n\t"
937       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
938       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
939    );
940    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
941 }
942
943 UInt s_pcmpistri_12 ( V128* argLU, V128* argRU )
944 {
945    V128 resV;
946    UInt resOSZACP, resECX;
947    Bool ok
948       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
949                        zmask_from_V128(argLU),
950                        zmask_from_V128(argRU),
951                        0x12, False/*!isSTRM*/
952         );
953    assert(ok);
954    resECX = resV.uInt[0];
955    return (resOSZACP << 16) | resECX;
956 }
957
958 void istri_12 ( void )
959 {
960    char* wot = "12";
961    UInt(*h)(V128*,V128*) = h_pcmpistri_12;
962    UInt(*s)(V128*,V128*) = s_pcmpistri_12;
963
964    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 
965    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 
966    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 
967    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 
968
969    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
970    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 
971    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 
972    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 
973    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 
974
975    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
976    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 
977    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 
978    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 
979
980    try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 
981    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
982
983    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 
984    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 
985    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 
986    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 
987
988    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 
989
990    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 
991    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 
992 }
993
994
995
996 //////////////////////////////////////////////////////////
997 //                                                      //
998 //                       ISTRI_44                       //
999 //                                                      //
1000 //////////////////////////////////////////////////////////
1001
1002 UInt h_pcmpistri_44 ( V128* argL, V128* argR )
1003 {
1004    V128 block[2];
1005    memcpy(&block[0], argL, sizeof(V128));
1006    memcpy(&block[1], argR, sizeof(V128));
1007    ULong res, flags;
1008    __asm__ __volatile__(
1009       "subq      $1024,  %%rsp"             "\n\t"
1010       "movdqu    0(%2),  %%xmm2"            "\n\t"
1011       "movdqu    16(%2), %%xmm11"           "\n\t"
1012       "pcmpistri $0x44,  %%xmm2, %%xmm11"   "\n\t"
1013 //"pcmpistrm $0x04, %%xmm2, %%xmm11"   "\n\t"
1014 //"movd %%xmm0, %%ecx" "\n\t"
1015       "pushfq"                              "\n\t"
1016       "popq      %%rdx"                     "\n\t"
1017       "movq      %%rcx,  %0"                "\n\t"
1018       "movq      %%rdx,  %1"                "\n\t"
1019       "addq      $1024,  %%rsp"             "\n\t"
1020       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1021       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1022    );
1023    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1024 }
1025
1026 UInt s_pcmpistri_44 ( V128* argLU, V128* argRU )
1027 {
1028    V128 resV;
1029    UInt resOSZACP, resECX;
1030    Bool ok
1031       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1032                        zmask_from_V128(argLU),
1033                        zmask_from_V128(argRU),
1034                        0x44, False/*!isSTRM*/
1035         );
1036    assert(ok);
1037    resECX = resV.uInt[0];
1038    return (resOSZACP << 16) | resECX;
1039 }
1040
1041 void istri_44 ( void )
1042 {
1043    char* wot = "44";
1044    UInt(*h)(V128*,V128*) = h_pcmpistri_44;
1045    UInt(*s)(V128*,V128*) = s_pcmpistri_44;
1046
1047    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc"); 
1048    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb"); 
1049    try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb"); 
1050    try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb"); 
1051
1052    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 
1053    try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb"); 
1054    try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb"); 
1055    try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb"); 
1056    try_istri(wot,h,s, "0000000000000000", "00000000000000cb"); 
1057
1058    try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 
1059
1060    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb"); 
1061    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b"); 
1062    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb"); 
1063
1064    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb"); 
1065    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb"); 
1066    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1067
1068    try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1069    try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1070
1071    try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1072    try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1073 }
1074
1075
1076 //////////////////////////////////////////////////////////
1077 //                                                      //
1078 //                       ISTRI_00                       //
1079 //                                                      //
1080 //////////////////////////////////////////////////////////
1081
1082 UInt h_pcmpistri_00 ( V128* argL, V128* argR )
1083 {
1084    V128 block[2];
1085    memcpy(&block[0], argL, sizeof(V128));
1086    memcpy(&block[1], argR, sizeof(V128));
1087    ULong res, flags;
1088    __asm__ __volatile__(
1089       "subq      $1024,  %%rsp"             "\n\t"
1090       "movdqu    0(%2),  %%xmm2"            "\n\t"
1091       "movdqu    16(%2), %%xmm11"           "\n\t"
1092       "pcmpistri $0x00,  %%xmm2, %%xmm11"   "\n\t"
1093 //"pcmpistrm $0x00, %%xmm2, %%xmm11"   "\n\t"
1094 //"movd %%xmm0, %%ecx" "\n\t"
1095       "pushfq"                              "\n\t"
1096       "popq      %%rdx"                     "\n\t"
1097       "movq      %%rcx,  %0"                "\n\t"
1098       "movq      %%rdx,  %1"                "\n\t"
1099       "addq      $1024,  %%rsp"             "\n\t"
1100       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1101       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1102    );
1103    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1104 }
1105
1106 UInt s_pcmpistri_00 ( V128* argLU, V128* argRU )
1107 {
1108    V128 resV;
1109    UInt resOSZACP, resECX;
1110    Bool ok
1111       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1112                        zmask_from_V128(argLU),
1113                        zmask_from_V128(argRU),
1114                        0x00, False/*!isSTRM*/
1115         );
1116    assert(ok);
1117    resECX = resV.uInt[0];
1118    return (resOSZACP << 16) | resECX;
1119 }
1120
1121 void istri_00 ( void )
1122 {
1123    char* wot = "00";
1124    UInt(*h)(V128*,V128*) = h_pcmpistri_00;
1125    UInt(*s)(V128*,V128*) = s_pcmpistri_00;
1126
1127    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a"); 
1128    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b"); 
1129    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab"); 
1130    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd"); 
1131
1132    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
1133    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd"); 
1134    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd"); 
1135    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd"); 
1136    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd"); 
1137
1138    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd"); 
1139    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd"); 
1140    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d"); 
1141    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0"); 
1142
1143    try_istri(wot,h,s, "0000000000000000", "0000000000000000"); 
1144    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); 
1145
1146    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd"); 
1147    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba"); 
1148    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb"); 
1149    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba"); 
1150
1151    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0"); 
1152
1153    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe"); 
1154    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe"); 
1155 }
1156
1157
1158
1159 //////////////////////////////////////////////////////////
1160 //                                                      //
1161 //                         main                         //
1162 //                                                      //
1163 //////////////////////////////////////////////////////////
1164
1165 int main ( void )
1166 {
1167    istri_4A();
1168    istri_3A();
1169    istri_08();
1170    istri_1A();
1171    istri_02();
1172    istri_0C();
1173    istri_12();
1174    istri_44();
1175    istri_00();
1176    return 0;
1177 }