2 /*--------------------------------------------------------------------*/
3 /*--- The core dispatch loop, for jumping to a code address. ---*/
4 /*--- dispatch-ppc64-aix5.S ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Valgrind, a dynamic binary instrumentation
11 Copyright (C) 2006-2010 OpenWorks LLP
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 The GNU General Public License is contained in the file COPYING.
31 Neither the names of the U.S. Department of Energy nor the
32 University of California nor the names of its contributors may be
33 used to endorse or promote products derived from this software
34 without prior written permission.
37 #if defined(VGP_ppc64_aix5)
39 #include "pub_core_basics_asm.h"
40 #include "pub_core_dispatch_asm.h"
41 #include "pub_core_transtab_asm.h"
42 #include "libvex_guest_offsets.h" /* for OFFSET_ppc64_CIA */
45 /*------------------------------------------------------------*/
47 /*--- The dispatch loop. VG_(run_innerloop) is used to ---*/
48 /*--- run all translations except no-redir ones. ---*/
50 /*------------------------------------------------------------*/
52 /*----------------------------------------------------*/
53 /*--- Incomprehensible TOC mumbo-jumbo nonsense. ---*/
54 /*----------------------------------------------------*/
56 /* No, I don't have a clue either. I just compiled a bit of
57 C with gcc and copied the assembly code it produced. */
59 /* Basically "ld rd, tocent__foo(2)" gets &foo into rd. */
61 .file "dispatch-ppc64-aix5.S"
66 tocent__vgPlain_dispatch_ctr:
67 .tc vgPlain_dispatch_ctr[TC],vgPlain_dispatch_ctr[RW]
68 tocent__vgPlain_machine_ppc64_has_VMX:
69 .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX[RW]
70 tocent__vgPlain_tt_fast:
71 .tc vgPlain_tt_fast[TC],vgPlain_tt_fast[RW]
72 tocent__vgPlain_tt_fastN:
73 .tc vgPlain_tt_fast[TC],vgPlain_tt_fastN[RW]
76 .globl vgPlain_run_innerloop
77 .globl .vgPlain_run_innerloop
78 .csect vgPlain_run_innerloop[DS]
79 vgPlain_run_innerloop:
80 .llong .vgPlain_run_innerloop, TOC[tc0], 0
83 /*----------------------------------------------------*/
84 /*--- Preamble (set everything up) ---*/
85 /*----------------------------------------------------*/
88 UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
90 .vgPlain_run_innerloop:
92 /* r3 holds guest_state */
93 /* r4 holds do_profiling */
94 /* Rather than attempt to make sense of the AIX ABI, just
95 drop r1 by 512 (to get away from the caller's frame), then
96 1024 (to give ourselves a 1024-byte save area), and then
97 another 512 (to clear our save area). In all, drop r1 by 2048
98 and dump stuff on the stack at 512(1)..1536(1). */
100 /* ----- entry point to VG_(run_innerloop) ----- */
101 /* For AIX/ppc64 we do: LR-> +16(parent_sp), CR-> +8(parent_sp) */
109 /* New stack frame */
110 stdu 1,-2048(1) /* sp should maintain 16-byte alignment */
112 /* Save callee-saved registers... */
113 /* r3, r4 are live here, so use r5 */
115 /* Floating-point reg save area : 144 bytes at r1[256+256..256+399] */
135 /* General reg save area : 76 bytes at r1[256+400 .. 256+543] */
154 /* Probably not necessary to save r13 (thread-specific ptr),
155 as VEX stays clear of it... but what the hell. */
158 /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
159 The Linux kernel might not actually use VRSAVE for its intended
160 purpose, but it should be harmless to preserve anyway. */
161 /* r3, r4 are live here, so use r5 */
162 ld 5,tocent__vgPlain_machine_ppc64_has_VMX(2)
167 // Sigh. AIX 5.2 has no idea that Altivec exists.
168 // /* VRSAVE save word : 4 bytes at r1[476 .. 479] */
169 // mfspr 5,256 /* vrsave reg is spr number 256 */
172 // /* Vector reg save area (quadword aligned):
173 // 192 bytes at r1[480 .. 671] */
200 /* Local variable space... */
201 /* Put the original guest state pointer at r1[256]. We
202 will need to refer to it each time round the dispatch loop.
203 Apart from that, we can use r1[0 .. 255] and r1[264 .. 511]
206 /* r3 holds guest_state */
207 /* r4 holds do_profiling */
208 mr 31,3 /* r31 (generated code gsp) = r3 */
209 std 3,256(1) /* stash orig guest_state ptr */
211 /* hold dispatch_ctr (NOTE: 32-bit value) in r29 */
212 ld 5,tocent__vgPlain_dispatch_ctr(2)
213 lwz 29,0(5) /* 32-bit zero-extending load */
215 /* set host FPU control word to the default mode expected
216 by VEX-generated code. See comments in libvex.h for
218 /* get zero into f3 (tedious) */
219 /* note: fsub 3,3,3 is not a reliable way to do this,
220 since if f3 holds a NaN or similar then we don't necessarily
221 wind up with zero. */
223 std 5,128(1) /* r1[128] is scratch */
225 mtfsf 0xFF,3 /* fpscr = f3 */
227 /* set host AltiVec control word to the default mode expected
228 by VEX-generated code. */
229 ld 5,tocent__vgPlain_machine_ppc64_has_VMX(2)
234 // Sigh. AIX 5.2 has no idea that Altivec exists.
235 // vspltisw 3,0x0 /* generate zero */
239 /* fetch %CIA into r3 */
240 ld 3,OFFSET_ppc64_CIA(31)
242 /* fall into main loop (the right one) */
243 /* r4 = do_profiling. It's probably trashed after here,
244 but that's OK: we don't need it after here. */
246 beq VG_(run_innerloop__dispatch_unprofiled)
247 b VG_(run_innerloop__dispatch_profiled)
250 /*----------------------------------------------------*/
251 /*--- NO-PROFILING (standard) dispatcher ---*/
252 /*----------------------------------------------------*/
254 .globl VG_(run_innerloop__dispatch_unprofiled)
255 VG_(run_innerloop__dispatch_unprofiled):
256 /* At entry: Live regs:
258 r3 (=CIA = next guest address)
262 256(r1) (=orig guest_state)
265 /* Has the guest state pointer been messed with? If yes, exit. */
266 ld 5,256(1) /* original guest_state ptr */
268 ld 5,tocent__vgPlain_tt_fast(2) /* &VG_(tt_fast) */
271 /* save the jump address in the guest state */
272 std 3,OFFSET_ppc64_CIA(31)
274 /* Are we out of timeslice? If yes, defer to scheduler. */
276 cmplwi 29,0 /* yes, lwi - is 32-bit */
279 /* try a fast lookup in the translation cache */
280 /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
281 = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */
282 rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */
283 sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */
284 add 5,5,4 /* &VG_(tt_fast)[entry#] */
285 ld 6,0(5) /* .guest */
286 ld 7,8(5) /* .host */
288 bne fast_lookup_failed
290 /* Found a match. Call .host. */
294 /* On return from guest code:
295 r3 holds destination (original) address.
296 r31 may be unchanged (guest_state), or may indicate further
297 details of the control transfer requested to *r3.
300 b VG_(run_innerloop__dispatch_unprofiled)
303 /*----------------------------------------------------*/
304 /*--- PROFILING dispatcher (can be much slower) ---*/
305 /*----------------------------------------------------*/
307 .globl VG_(run_innerloop__dispatch_profiled)
308 VG_(run_innerloop__dispatch_profiled):
309 /* At entry: Live regs:
311 r3 (=CIA = next guest address)
315 256(r1) (=orig guest_state)
318 /* Has the guest state pointer been messed with? If yes, exit. */
319 ld 5,256(1) /* original guest_state ptr */
321 ld 5,tocent__vgPlain_tt_fast(2) /* &VG_(tt_fast) */
324 /* save the jump address in the guest state */
325 std 3,OFFSET_ppc64_CIA(31)
327 /* Are we out of timeslice? If yes, defer to scheduler. */
329 cmplwi 29,0 /* yes, lwi - is 32-bit */
332 /* try a fast lookup in the translation cache */
333 /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
334 = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */
335 rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */
336 sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */
337 add 5,5,4 /* &VG_(tt_fast)[entry#] */
338 ld 6,0(5) /* .guest */
339 ld 7,8(5) /* .host */
341 bne fast_lookup_failed
343 /* increment bb profile counter */
344 ld 9,tocent__vgPlain_tt_fastN(2) /* r9 = &tt_fastN */
345 srdi 4,4,1 /* entry# * sizeof(UInt*) */
346 ldx 8,9,4 /* r7 = tt_fastN[r4] */
351 /* Found a match. Call .host. */
355 /* On return from guest code:
356 r3 holds destination (original) address.
357 r31 may be unchanged (guest_state), or may indicate further
358 details of the control transfer requested to *r3.
361 b VG_(run_innerloop__dispatch_profiled)
364 /*----------------------------------------------------*/
365 /*--- exit points ---*/
366 /*----------------------------------------------------*/
369 /* Someone messed with the gsp (in r31). Have to
370 defer to scheduler to resolve this. dispatch ctr
371 is not yet decremented, so no need to increment. */
372 /* %CIA is NOT up to date here. First, need to write
373 %r3 back to %CIA, but without trashing %r31 since
374 that holds the value we want to return to the scheduler.
375 Hence use %r5 transiently for the guest state pointer. */
376 ld 5,256(1) /* original guest_state ptr */
377 std 3,OFFSET_ppc64_CIA(5)
378 mr 3,31 /* r3 = new gsp value */
383 /* %CIA is up to date */
384 /* back out decrement of the dispatch counter */
386 li 3,VG_TRC_INNER_COUNTERZERO
390 /* %CIA is up to date */
391 /* back out decrement of the dispatch counter */
393 li 3,VG_TRC_INNER_FASTMISS
398 /* All exits from the dispatcher go through here.
399 r3 holds the return value.
402 /* We're leaving. Check that nobody messed with
405 /* Set fpscr back to a known state, since vex-generated code
406 may have messed with fpscr[rm]. */
408 std 5,128(1) /* r1[128] is scratch */
410 mtfsf 0xFF,3 /* fpscr = f3 */
412 /* Using r11 - value used again further on, so don't trash! */
413 ld 11,tocent__vgPlain_machine_ppc64_has_VMX(2)
418 // Sigh. AIX 5.2 has no idea that Altivec exists.
419 // /* Check VSCR[NJ] == 1 */
420 // /* first generate 4x 0x00010000 */
421 // vspltisw 4,0x1 /* 4x 0x00000001 */
422 // vspltisw 5,0x0 /* zero */
423 // vsldoi 6,4,5,0x2 /* <<2*8 => 4x 0x00010000 */
424 // /* retrieve VSCR and mask wanted bits */
426 // vand 7,7,6 /* gives NJ flag */
427 // vspltw 7,7,0x3 /* flags-word to all lanes */
428 // vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */
429 // bt 24,invariant_violation /* branch if all_equal */
432 /* otherwise we're OK */
433 b run_innerloop_exit_REALLY
437 li 3,VG_TRC_INVARIANT_FAILED
438 b run_innerloop_exit_REALLY
440 run_innerloop_exit_REALLY:
441 /* r3 holds VG_TRC_* value to return */
443 /* Write ctr to VG(dispatch_ctr) */
444 ld 5,tocent__vgPlain_dispatch_ctr(2)
445 stw 29,0(5) /* yes, really stw */
447 /* Restore callee-saved registers... */
449 /* Floating-point regs */
490 /* r11 already holds VG_(machine_ppc64_has_VMX) value */
494 // Sigh. AIX 5.2 has no idea that Altivec exists.
497 // mtspr 4,256 /* VRSAVE reg is spr number 256 */
526 /* r3 is live here; don't trash it */
527 /* restore lr,cr,sp */
528 addi 4,1,2048 /* r4 = old SP */
536 LT..vgPlain_run_innerloop:
538 .byte 0,0,32,64,0,0,1,0
540 .long LT..vgPlain_run_innerloop-.vgPlain_run_innerloop
542 .byte "vgPlain_run_innerloop"
546 .llong _section_.text
548 /*------------------------------------------------------------*/
550 /*--- A special dispatcher, for running no-redir ---*/
551 /*--- translations. Just runs the given translation once. ---*/
553 /*------------------------------------------------------------*/
556 void VG_(run_a_noredir_translation) ( UWord* argblock );
559 /* Run a no-redir translation. argblock points to 4 UWords, 2 to carry args
560 and 2 to carry results:
561 0: input: ptr to translation
562 1: input: ptr to guest state
563 2: output: next guest PC
564 3: output: guest state pointer afterwards (== thread return code)
568 .globl .VG_(run_a_noredir_translation)
569 .VG_(run_a_noredir_translation):
570 /* Rather than attempt to make sense of the AIX ABI, just
571 drop r1 by 512 (to get away from the caller's frame), then
572 1024 (to give ourselves a 1024-byte save area), and then
573 another 1024 (to clear our save area). In all, drop r1 by 2048
574 and dump stuff on the stack at 512(1)..1536(1). */
575 /* At entry, r3 points to argblock */
577 /* ----- entry point to VG_(run_innerloop) ----- */
578 /* For AIX/ppc64 we do: LR-> +16(parent_sp), CR-> +8(parent_sp) */
586 /* New stack frame */
587 stdu 1,-2048(1) /* sp should maintain 16-byte alignment */
589 /* General reg save area : 160 bytes at r1[512 .. 671] */
609 std 3,512(1) /* will need it later */
611 ld 31,8(3) /* rd argblock[1] */
612 ld 30,0(3) /* rd argblock[0] */
613 mtlr 30 /* run translation */
616 ld 4,512(1) /* &argblock */
617 std 3, 16(4) /* wr argblock[2] */
618 std 31,24(4) /* wr argblock[3] */
641 /* restore lr,cr,sp */
642 addi 4,1,2048 /* r4 = old SP */
650 #endif // defined(VGP_ppc64_aix5)
652 /*--------------------------------------------------------------------*/
654 /*--------------------------------------------------------------------*/