2 /* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using
3 pcmpistri to drive it. Does not check the e-vs-i or i-vs-m
10 typedef unsigned int UInt;
11 typedef signed int Int;
12 typedef unsigned char UChar;
13 typedef unsigned long long int ULong;
15 #define False ((Bool)0)
16 #define True ((Bool)1)
18 //typedef unsigned char V128[16];
33 #define MASK_O (1ULL << SHIFT_O)
34 #define MASK_S (1ULL << SHIFT_S)
35 #define MASK_Z (1ULL << SHIFT_Z)
36 #define MASK_A (1ULL << SHIFT_A)
37 #define MASK_C (1ULL << SHIFT_C)
38 #define MASK_P (1ULL << SHIFT_P)
67 return 32 - clz32((~x) & (x-1));
70 void expand ( V128* dst, char* summary )
73 assert( strlen(summary) == 16 );
74 for (i = 0; i < 16; i++) {
76 UChar x = summary[15-i];
77 if (x >= '0' && x <= '9') { xx = x - '0'; }
78 else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
79 else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
89 void try_istri ( char* which,
90 UInt(*h_fn)(V128*,V128*),
91 UInt(*s_fn)(V128*,V128*),
92 char* summL, char* summR )
94 assert(strlen(which) == 2);
98 UInt h_res = h_fn(&argL, &argR);
99 UInt s_res = s_fn(&argL, &argR);
100 printf("istri %s %s %s -> %08x %08x %s\n",
101 which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!");
104 UInt zmask_from_V128 ( V128* arg )
107 for (i = 0; i < 16; i++) {
108 res |= ((arg->uChar[i] == 0) ? 1 : 0) << i;
113 //////////////////////////////////////////////////////////
117 //////////////////////////////////////////////////////////
120 /* Given partial results from a pcmpXstrX operation (intRes1,
121 basically), generate an I format (index value for ECX) output, and
122 also the new OSZACP flags.
125 void pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128* resV,
126 /*OUT*/UInt* resOSZACP,
128 UInt zmaskL, UInt zmaskR,
132 assert((pol >> 2) == 0);
133 assert((idx >> 1) == 0);
137 case 0: intRes2 = intRes1; break; // pol +
138 case 1: intRes2 = ~intRes1; break; // pol -
139 case 2: intRes2 = intRes1; break; // pol m+
140 case 3: intRes2 = intRes1 ^ validL; break; // pol m-
144 // generate ecx value
148 newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
151 newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
154 *(UInt*)(&resV[0]) = newECX;
156 // generate new flags, common to all ISTRI and ISTRM cases
157 *resOSZACP // A, P are zero
158 = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
159 | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
160 | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0
161 | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0]
165 /* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
168 For xSTRI variants, the new ECX value is placed in the 32 bits
169 pointed to by *resV. For xSTRM variants, the result is a 128 bit
170 value and is placed at *resV in the obvious way.
172 For all variants, the new OSZACP value is placed at *resOSZACP.
174 argLV and argRV are the vector args. The caller must prepare a
175 16-bit mask for each, zmaskL and zmaskR. For ISTRx variants this
176 must be 1 for each zero byte of of the respective arg. For ESTRx
177 variants this is derived from the explicit length indication, and
178 must be 0 in all places except at the bit index corresponding to
179 the valid length (0 .. 16). If the valid length is 16 then the
180 mask must be all zeroes. In all cases, bits 31:16 must be zero.
182 imm8 is the original immediate from the instruction. isSTRM
183 indicates whether this is a xSTRM or xSTRI variant, which controls
184 how much of *res is written.
186 If the given imm8 case can be handled, the return value is True.
187 If not, False is returned, and neither *res not *resOSZACP are
191 Bool pcmpXstrX_WRK ( /*OUT*/V128* resV,
192 /*OUT*/UInt* resOSZACP,
193 V128* argLV, V128* argRV,
194 UInt zmaskL, UInt zmaskR,
195 UInt imm8, Bool isSTRM )
198 assert((zmaskL >> 16) == 0);
199 assert((zmaskR >> 16) == 0);
201 /* Explicitly reject any imm8 values that haven't been validated,
202 even if they would probably work. Life is too short to have
203 unvalidated cases in the code base. */
206 case 0x02: case 0x08: case 0x0C: case 0x12: case 0x1A:
207 case 0x3A: case 0x44: case 0x4A:
213 UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format
214 UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn
215 UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity
216 UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask
218 /*----------------------------------------*/
219 /*-- strcmp on byte data --*/
220 /*----------------------------------------*/
222 if (agg == 2/*equal each, aka strcmp*/
223 && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
226 UChar* argL = (UChar*)argLV;
227 UChar* argR = (UChar*)argRV;
229 for (i = 15; i >= 0; i--) {
232 boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
234 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
235 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
237 // do invalidation, common to all equal-each cases
239 = (boolResII & validL & validR) // if both valid, use cmpres
240 | (~ (validL | validR)); // if both invalid, force 1
244 // generate I-format output
245 pcmpXstrX_WRK_gen_output_fmt_I(
247 intRes1, zmaskL, zmaskR, validL, pol, idx
253 /*----------------------------------------*/
254 /*-- set membership on byte data --*/
255 /*----------------------------------------*/
257 if (agg == 0/*equal any, aka find chars in a set*/
258 && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
260 /* argL: the string, argR: charset */
262 UChar* argL = (UChar*)argLV;
263 UChar* argR = (UChar*)argRV;
265 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
266 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
268 for (si = 0; si < 16; si++) {
269 if ((validL & (1 << si)) == 0)
270 // run off the end of the string.
273 for (ci = 0; ci < 16; ci++) {
274 if ((validR & (1 << ci)) == 0) break;
275 if (argR[ci] == argL[si]) { m = 1; break; }
277 boolRes |= (m << si);
280 // boolRes is "pre-invalidated"
281 UInt intRes1 = boolRes & 0xFFFF;
283 // generate I-format output
284 pcmpXstrX_WRK_gen_output_fmt_I(
286 intRes1, zmaskL, zmaskR, validL, pol, idx
292 /*----------------------------------------*/
293 /*-- substring search on byte data --*/
294 /*----------------------------------------*/
296 if (agg == 3/*equal ordered, aka substring search*/
297 && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
300 /* argL: haystack, argR: needle */
302 UChar* argL = (UChar*)argLV;
303 UChar* argR = (UChar*)argRV;
305 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
306 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
307 for (hi = 0; hi < 16; hi++) {
308 if ((validL & (1 << hi)) == 0)
309 // run off the end of the haystack
312 for (ni = 0; ni < 16; ni++) {
313 if ((validR & (1 << ni)) == 0) break;
316 if (argL[i] != argR[ni]) { m = 0; break; }
318 boolRes |= (m << hi);
321 // boolRes is "pre-invalidated"
322 UInt intRes1 = boolRes & 0xFFFF;
324 // generate I-format output
325 pcmpXstrX_WRK_gen_output_fmt_I(
327 intRes1, zmaskL, zmaskR, validL, pol, idx
333 /*----------------------------------------*/
334 /*-- ranges, unsigned byte data --*/
335 /*----------------------------------------*/
337 if (agg == 1/*ranges*/
341 /* argL: string, argR: range-pairs */
343 UChar* argL = (UChar*)argLV;
344 UChar* argR = (UChar*)argRV;
346 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
347 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
348 for (si = 0; si < 16; si++) {
349 if ((validL & (1 << si)) == 0)
350 // run off the end of the string
353 for (ri = 0; ri < 16; ri += 2) {
354 if ((validR & (3 << ri)) != (3 << ri)) break;
355 if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
359 boolRes |= (m << si);
362 // boolRes is "pre-invalidated"
363 UInt intRes1 = boolRes & 0xFFFF;
365 // generate I-format output
366 pcmpXstrX_WRK_gen_output_fmt_I(
368 intRes1, zmaskL, zmaskR, validL, pol, idx
378 //////////////////////////////////////////////////////////
382 //////////////////////////////////////////////////////////
384 UInt h_pcmpistri_4A ( V128* argL, V128* argR )
387 memcpy(&block[0], argL, sizeof(V128));
388 memcpy(&block[1], argR, sizeof(V128));
390 __asm__ __volatile__(
391 "subq $1024, %%rsp" "\n\t"
392 "movdqu 0(%2), %%xmm2" "\n\t"
393 "movdqu 16(%2), %%xmm11" "\n\t"
394 "pcmpistri $0x4A, %%xmm2, %%xmm11" "\n\t"
397 "movq %%rcx, %0" "\n\t"
398 "movq %%rdx, %1" "\n\t"
399 "addq $1024, %%rsp" "\n\t"
400 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
401 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
403 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
406 UInt s_pcmpistri_4A ( V128* argLU, V128* argRU )
409 UInt resOSZACP, resECX;
411 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
412 zmask_from_V128(argLU),
413 zmask_from_V128(argRU),
414 0x4A, False/*!isSTRM*/
417 resECX = resV.uInt[0];
418 return (resOSZACP << 16) | resECX;
421 void istri_4A ( void )
424 UInt(*h)(V128*,V128*) = h_pcmpistri_4A;
425 UInt(*s)(V128*,V128*) = s_pcmpistri_4A;
427 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
429 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
430 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
431 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
432 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
434 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
435 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
436 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
438 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
439 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
440 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
441 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
443 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
444 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
445 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
447 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
449 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
450 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
451 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
453 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
454 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
455 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
457 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
458 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
459 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
461 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
462 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
463 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
465 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
466 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
469 //////////////////////////////////////////////////////////
473 //////////////////////////////////////////////////////////
475 UInt h_pcmpistri_3A ( V128* argL, V128* argR )
478 memcpy(&block[0], argL, sizeof(V128));
479 memcpy(&block[1], argR, sizeof(V128));
481 __asm__ __volatile__(
482 "subq $1024, %%rsp" "\n\t"
483 "movdqu 0(%2), %%xmm2" "\n\t"
484 "movdqu 16(%2), %%xmm11" "\n\t"
485 "pcmpistri $0x3A, %%xmm2, %%xmm11" "\n\t"
488 "movq %%rcx, %0" "\n\t"
489 "movq %%rdx, %1" "\n\t"
490 "addq $1024, %%rsp" "\n\t"
491 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
492 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
494 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
497 UInt s_pcmpistri_3A ( V128* argLU, V128* argRU )
500 UInt resOSZACP, resECX;
502 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
503 zmask_from_V128(argLU),
504 zmask_from_V128(argRU),
505 0x3A, False/*!isSTRM*/
508 resECX = resV.uInt[0];
509 return (resOSZACP << 16) | resECX;
512 void istri_3A ( void )
515 UInt(*h)(V128*,V128*) = h_pcmpistri_3A;
516 UInt(*s)(V128*,V128*) = s_pcmpistri_3A;
518 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
520 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
521 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
522 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
523 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
525 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
526 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
527 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
529 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
530 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
531 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
532 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
534 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
535 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
536 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
538 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
540 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
541 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
542 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
544 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
545 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
546 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
548 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
549 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
550 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
552 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
553 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
554 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
556 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
557 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
562 //////////////////////////////////////////////////////////
566 //////////////////////////////////////////////////////////
568 __attribute__((noinline))
569 UInt h_pcmpistri_0C ( V128* argL, V128* argR )
572 memcpy(&block[0], argL, sizeof(V128));
573 memcpy(&block[1], argR, sizeof(V128));
574 ULong res = 0, flags = 0;
575 __asm__ __volatile__(
576 "movdqa 0(%2), %%xmm2" "\n\t"
577 "movdqa 16(%2), %%xmm11" "\n\t"
578 "pcmpistri $0x0C, %%xmm2, %%xmm11" "\n\t"
579 //"pcmpistrm $0x0C, %%xmm2, %%xmm11" "\n\t"
580 //"movd %%xmm0, %%ecx" "\n\t"
583 "movq %%rcx, %0" "\n\t"
584 "movq %%rdx, %1" "\n\t"
585 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
586 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
588 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
591 UInt s_pcmpistri_0C ( V128* argLU, V128* argRU )
594 UInt resOSZACP, resECX;
596 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
597 zmask_from_V128(argLU),
598 zmask_from_V128(argRU),
599 0x0C, False/*!isSTRM*/
602 resECX = resV.uInt[0];
603 return (resOSZACP << 16) | resECX;
606 void istri_0C ( void )
609 UInt(*h)(V128*,V128*) = h_pcmpistri_0C;
610 UInt(*s)(V128*,V128*) = s_pcmpistri_0C;
612 try_istri(wot,h,s, "111111111abcde11", "00000000000abcde");
614 try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde");
616 try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde");
617 try_istri(wot,h,s, "11111111111abcde", "00000000000abcde");
618 try_istri(wot,h,s, "111111111111abcd", "00000000000abcde");
620 try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde");
622 try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde");
623 try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde");
624 try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde");
625 try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde");
626 try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde");
628 try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde");
629 try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde");
630 try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde");
632 try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde");
633 try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde");
635 try_istri(wot,h,s, "1111111111111234", "0000000000000000");
636 try_istri(wot,h,s, "1111111111111234", "0000000000000001");
637 try_istri(wot,h,s, "1111111111111234", "0000000000000011");
639 try_istri(wot,h,s, "1111111111111234", "1111111111111234");
640 try_istri(wot,h,s, "a111111111111111", "000000000000000a");
641 try_istri(wot,h,s, "b111111111111111", "000000000000000a");
645 //////////////////////////////////////////////////////////
649 //////////////////////////////////////////////////////////
651 UInt h_pcmpistri_08 ( V128* argL, V128* argR )
654 memcpy(&block[0], argL, sizeof(V128));
655 memcpy(&block[1], argR, sizeof(V128));
657 __asm__ __volatile__(
658 "subq $1024, %%rsp" "\n\t"
659 "movdqu 0(%2), %%xmm2" "\n\t"
660 "movdqu 16(%2), %%xmm11" "\n\t"
661 "pcmpistri $0x08, %%xmm2, %%xmm11" "\n\t"
664 "movq %%rcx, %0" "\n\t"
665 "movq %%rdx, %1" "\n\t"
666 "addq $1024, %%rsp" "\n\t"
667 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
668 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
670 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
673 UInt s_pcmpistri_08 ( V128* argLU, V128* argRU )
676 UInt resOSZACP, resECX;
678 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
679 zmask_from_V128(argLU),
680 zmask_from_V128(argRU),
681 0x08, False/*!isSTRM*/
684 resECX = resV.uInt[0];
685 return (resOSZACP << 16) | resECX;
688 void istri_08 ( void )
691 UInt(*h)(V128*,V128*) = h_pcmpistri_08;
692 UInt(*s)(V128*,V128*) = s_pcmpistri_08;
694 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
696 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
697 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
698 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
699 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
701 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
702 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
703 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
705 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
706 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
707 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
708 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
710 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
711 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
712 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
714 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
716 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
717 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
718 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
720 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
721 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
722 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
724 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
725 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
726 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
728 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
729 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
730 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
732 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
733 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
738 //////////////////////////////////////////////////////////
742 //////////////////////////////////////////////////////////
744 UInt h_pcmpistri_1A ( V128* argL, V128* argR )
747 memcpy(&block[0], argL, sizeof(V128));
748 memcpy(&block[1], argR, sizeof(V128));
750 __asm__ __volatile__(
751 "subq $1024, %%rsp" "\n\t"
752 "movdqu 0(%2), %%xmm2" "\n\t"
753 "movdqu 16(%2), %%xmm11" "\n\t"
754 "pcmpistri $0x1A, %%xmm2, %%xmm11" "\n\t"
757 "movq %%rcx, %0" "\n\t"
758 "movq %%rdx, %1" "\n\t"
759 "addq $1024, %%rsp" "\n\t"
760 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
761 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
763 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
766 UInt s_pcmpistri_1A ( V128* argLU, V128* argRU )
769 UInt resOSZACP, resECX;
771 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
772 zmask_from_V128(argLU),
773 zmask_from_V128(argRU),
774 0x1A, False/*!isSTRM*/
777 resECX = resV.uInt[0];
778 return (resOSZACP << 16) | resECX;
781 void istri_1A ( void )
784 UInt(*h)(V128*,V128*) = h_pcmpistri_1A;
785 UInt(*s)(V128*,V128*) = s_pcmpistri_1A;
787 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
789 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
790 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
791 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
792 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
794 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
795 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
796 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
798 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
799 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
800 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
801 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
803 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
804 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
805 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
807 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
809 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
810 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
811 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
813 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
814 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
815 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
817 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
818 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
819 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
821 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
822 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
823 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
825 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
826 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
831 //////////////////////////////////////////////////////////
835 //////////////////////////////////////////////////////////
837 UInt h_pcmpistri_02 ( V128* argL, V128* argR )
840 memcpy(&block[0], argL, sizeof(V128));
841 memcpy(&block[1], argR, sizeof(V128));
843 __asm__ __volatile__(
844 "subq $1024, %%rsp" "\n\t"
845 "movdqu 0(%2), %%xmm2" "\n\t"
846 "movdqu 16(%2), %%xmm11" "\n\t"
847 "pcmpistri $0x02, %%xmm2, %%xmm11" "\n\t"
848 //"pcmpistrm $0x02, %%xmm2, %%xmm11" "\n\t"
849 //"movd %%xmm0, %%ecx" "\n\t"
852 "movq %%rcx, %0" "\n\t"
853 "movq %%rdx, %1" "\n\t"
854 "addq $1024, %%rsp" "\n\t"
855 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
856 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
858 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
861 UInt s_pcmpistri_02 ( V128* argLU, V128* argRU )
864 UInt resOSZACP, resECX;
866 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
867 zmask_from_V128(argLU),
868 zmask_from_V128(argRU),
869 0x02, False/*!isSTRM*/
872 resECX = resV.uInt[0];
873 return (resOSZACP << 16) | resECX;
876 void istri_02 ( void )
879 UInt(*h)(V128*,V128*) = h_pcmpistri_02;
880 UInt(*s)(V128*,V128*) = s_pcmpistri_02;
882 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
883 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
884 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
885 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
887 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
888 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
889 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
890 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
891 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
893 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
894 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
895 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
896 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
898 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
899 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
901 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
902 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
903 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
904 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
906 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
908 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
909 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
913 //////////////////////////////////////////////////////////
917 //////////////////////////////////////////////////////////
919 UInt h_pcmpistri_12 ( V128* argL, V128* argR )
922 memcpy(&block[0], argL, sizeof(V128));
923 memcpy(&block[1], argR, sizeof(V128));
925 __asm__ __volatile__(
926 "subq $1024, %%rsp" "\n\t"
927 "movdqu 0(%2), %%xmm2" "\n\t"
928 "movdqu 16(%2), %%xmm11" "\n\t"
929 "pcmpistri $0x12, %%xmm2, %%xmm11" "\n\t"
930 //"pcmpistrm $0x12, %%xmm2, %%xmm11" "\n\t"
931 //"movd %%xmm0, %%ecx" "\n\t"
934 "movq %%rcx, %0" "\n\t"
935 "movq %%rdx, %1" "\n\t"
936 "addq $1024, %%rsp" "\n\t"
937 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
938 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
940 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
943 UInt s_pcmpistri_12 ( V128* argLU, V128* argRU )
946 UInt resOSZACP, resECX;
948 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
949 zmask_from_V128(argLU),
950 zmask_from_V128(argRU),
951 0x12, False/*!isSTRM*/
954 resECX = resV.uInt[0];
955 return (resOSZACP << 16) | resECX;
958 void istri_12 ( void )
961 UInt(*h)(V128*,V128*) = h_pcmpistri_12;
962 UInt(*s)(V128*,V128*) = s_pcmpistri_12;
964 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
965 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
966 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
967 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
969 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
970 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
971 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
972 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
973 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
975 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
976 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
977 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
978 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
980 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
981 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
983 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
984 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
985 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
986 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
988 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
990 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
991 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
996 //////////////////////////////////////////////////////////
1000 //////////////////////////////////////////////////////////
1002 UInt h_pcmpistri_44 ( V128* argL, V128* argR )
1005 memcpy(&block[0], argL, sizeof(V128));
1006 memcpy(&block[1], argR, sizeof(V128));
1008 __asm__ __volatile__(
1009 "subq $1024, %%rsp" "\n\t"
1010 "movdqu 0(%2), %%xmm2" "\n\t"
1011 "movdqu 16(%2), %%xmm11" "\n\t"
1012 "pcmpistri $0x44, %%xmm2, %%xmm11" "\n\t"
1013 //"pcmpistrm $0x04, %%xmm2, %%xmm11" "\n\t"
1014 //"movd %%xmm0, %%ecx" "\n\t"
1017 "movq %%rcx, %0" "\n\t"
1018 "movq %%rdx, %1" "\n\t"
1019 "addq $1024, %%rsp" "\n\t"
1020 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1021 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1023 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1026 UInt s_pcmpistri_44 ( V128* argLU, V128* argRU )
1029 UInt resOSZACP, resECX;
1031 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1032 zmask_from_V128(argLU),
1033 zmask_from_V128(argRU),
1034 0x44, False/*!isSTRM*/
1037 resECX = resV.uInt[0];
1038 return (resOSZACP << 16) | resECX;
1041 void istri_44 ( void )
1044 UInt(*h)(V128*,V128*) = h_pcmpistri_44;
1045 UInt(*s)(V128*,V128*) = s_pcmpistri_44;
1047 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1048 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1049 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1050 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1052 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1053 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1054 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1055 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1056 try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1058 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1060 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1061 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1062 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1064 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1065 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1066 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1068 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1069 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1071 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1072 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1076 //////////////////////////////////////////////////////////
1080 //////////////////////////////////////////////////////////
1082 UInt h_pcmpistri_00 ( V128* argL, V128* argR )
1085 memcpy(&block[0], argL, sizeof(V128));
1086 memcpy(&block[1], argR, sizeof(V128));
1088 __asm__ __volatile__(
1089 "subq $1024, %%rsp" "\n\t"
1090 "movdqu 0(%2), %%xmm2" "\n\t"
1091 "movdqu 16(%2), %%xmm11" "\n\t"
1092 "pcmpistri $0x00, %%xmm2, %%xmm11" "\n\t"
1093 //"pcmpistrm $0x00, %%xmm2, %%xmm11" "\n\t"
1094 //"movd %%xmm0, %%ecx" "\n\t"
1097 "movq %%rcx, %0" "\n\t"
1098 "movq %%rdx, %1" "\n\t"
1099 "addq $1024, %%rsp" "\n\t"
1100 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1101 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1103 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1106 UInt s_pcmpistri_00 ( V128* argLU, V128* argRU )
1109 UInt resOSZACP, resECX;
1111 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1112 zmask_from_V128(argLU),
1113 zmask_from_V128(argRU),
1114 0x00, False/*!isSTRM*/
1117 resECX = resV.uInt[0];
1118 return (resOSZACP << 16) | resECX;
1121 void istri_00 ( void )
1124 UInt(*h)(V128*,V128*) = h_pcmpistri_00;
1125 UInt(*s)(V128*,V128*) = s_pcmpistri_00;
1127 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1128 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1129 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1130 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1132 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1133 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1134 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1135 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1136 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1138 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1139 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1140 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1141 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1143 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1144 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1146 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1147 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1148 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1149 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1151 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1153 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1154 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1159 //////////////////////////////////////////////////////////
1163 //////////////////////////////////////////////////////////