2 /*--------------------------------------------------------------------*/
3 /*--- The core dispatch loop, for jumping to a code address. ---*/
4 /*--- dispatch-ppc32-linux.S ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Valgrind, a dynamic binary instrumentation
11 Copyright (C) 2005-2010 Cerion Armour-Brown <cerion@open-works.co.uk>
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
31 #if defined(VGP_ppc32_linux)
34 #include "pub_core_basics_asm.h"
35 #include "pub_core_dispatch_asm.h"
36 #include "pub_core_transtab_asm.h"
37 #include "libvex_guest_offsets.h" /* for OFFSET_ppc32_CIA */
40 /*------------------------------------------------------------*/
42 /*--- The dispatch loop. VG_(run_innerloop) is used to ---*/
43 /*--- run all translations except no-redir ones. ---*/
45 /*------------------------------------------------------------*/
47 /*----------------------------------------------------*/
48 /*--- Preamble (set everything up) ---*/
49 /*----------------------------------------------------*/
52 UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
55 .globl VG_(run_innerloop)
56 .type VG_(run_innerloop), @function
58 /* r3 holds guest_state */
59 /* r4 holds do_profiling */
61 /* ----- entry point to VG_(run_innerloop) ----- */
62 /* For Linux/ppc32 we need the SysV ABI, which uses
63 LR->4(parent_sp), CR->anywhere.
64 (The AIX ABI, used on Darwin,
65 uses LR->8(prt_sp), CR->4(prt_sp))
73 stwu 1,-496(1) /* sp should maintain 16-byte alignment */
75 /* Save callee-saved registers... */
76 /* r3, r4 are live here, so use r5 */
77 lis 5,VG_(machine_ppc32_has_FP)@ha
78 lwz 5,VG_(machine_ppc32_has_FP)@l(5)
82 /* Floating-point reg save area : 144 bytes */
103 /* General reg save area : 72 bytes */
122 /* Probably not necessary to save r13 (thread-specific ptr),
123 as VEX stays clear of it... but what the hey. */
126 /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
127 The Linux kernel might not actually use VRSAVE for its intended
128 purpose, but it should be harmless to preserve anyway. */
129 /* r3, r4 are live here, so use r5 */
130 lis 5,VG_(machine_ppc32_has_VMX)@ha
131 lwz 5,VG_(machine_ppc32_has_VMX)@l(5)
136 /* VRSAVE save word : 32 bytes */
137 mfspr 5,256 /* vrsave reg is spr number 256 */
140 /* Alignment padding : 4 bytes */
142 /* Vector reg save area (quadword aligned) : 192 bytes */
175 /* Local variable space... */
177 /* 32(sp) used later to check FPSCR[RM] */
179 /* r3 holds guest_state */
180 /* r4 holds do_profiling */
181 mr 31,3 /* r31 (generated code gsp) = r3 */
182 stw 3,28(1) /* spill orig guest_state ptr */
184 /* 24(sp) used later to stop ctr reg being clobbered */
185 /* 20(sp) used later to load fpscr with zero */
188 /* Linkage Area (reserved)
193 /* CAB TODO: Use a caller-saved reg for orig guest_state ptr
194 - rem to set non-allocateable in isel.c */
196 /* hold dispatch_ctr in r29 */
197 lis 5,VG_(dispatch_ctr)@ha
198 lwz 29,VG_(dispatch_ctr)@l(5)
200 /* set host FPU control word to the default mode expected
201 by VEX-generated code. See comments in libvex.h for
203 lis 5,VG_(machine_ppc32_has_FP)@ha
204 lwz 5,VG_(machine_ppc32_has_FP)@l(5)
208 /* get zero into f3 (tedious) */
209 /* note: fsub 3,3,3 is not a reliable way to do this,
210 since if f3 holds a NaN or similar then we don't necessarily
211 wind up with zero. */
215 mtfsf 0xFF,3 /* fpscr = f3 */
218 /* set host AltiVec control word to the default mode expected
219 by VEX-generated code. */
220 lis 5,VG_(machine_ppc32_has_VMX)@ha
221 lwz 5,VG_(machine_ppc32_has_VMX)@l(5)
226 vspltisw 3,0x0 /* generate zero */
232 /* make a stack frame for the code we are calling */
235 /* fetch %CIA into r3 */
236 lwz 3,OFFSET_ppc32_CIA(31)
238 /* fall into main loop (the right one) */
239 /* r4 = do_profiling. It's probably trashed after here,
240 but that's OK: we don't need it after here. */
242 beq VG_(run_innerloop__dispatch_unprofiled)
243 b VG_(run_innerloop__dispatch_profiled)
246 /*----------------------------------------------------*/
247 /*--- NO-PROFILING (standard) dispatcher ---*/
248 /*----------------------------------------------------*/
250 .global VG_(run_innerloop__dispatch_unprofiled)
251 VG_(run_innerloop__dispatch_unprofiled):
252 /* At entry: Live regs:
254 r3 (=CIA = next guest address)
258 44(r1) (=orig guest_state)
260 /* Has the guest state pointer been messed with? If yes, exit.
261 Also set up & VG_(tt_fast) early in an attempt at better
263 lwz 9,44(1) /* original guest_state ptr */
264 lis 5,VG_(tt_fast)@ha
265 addi 5,5,VG_(tt_fast)@l /* & VG_(tt_fast) */
269 /* save the jump address in the guest state */
270 stw 3,OFFSET_ppc32_CIA(31)
272 /* Are we out of timeslice? If yes, defer to scheduler. */
277 /* try a fast lookup in the translation cache */
278 /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
279 = ((r3 >>u 2) & VG_TT_FAST_MASK) << 3 */
280 rlwinm 4,3,1, 29-VG_TT_FAST_BITS, 28 /* entry# * 8 */
281 add 5,5,4 /* & VG_(tt_fast)[entry#] */
282 lwz 6,0(5) /* .guest */
283 lwz 7,4(5) /* .host */
285 bne fast_lookup_failed
287 /* Found a match. Call .host. */
291 /* On return from guest code:
292 r3 holds destination (original) address.
293 r31 may be unchanged (guest_state), or may indicate further
294 details of the control transfer requested to *r3.
297 b VG_(run_innerloop__dispatch_unprofiled)
300 /*----------------------------------------------------*/
301 /*--- PROFILING dispatcher (can be much slower) ---*/
302 /*----------------------------------------------------*/
304 .global VG_(run_innerloop__dispatch_profiled)
305 VG_(run_innerloop__dispatch_profiled):
306 /* At entry: Live regs:
308 r3 (=CIA = next guest address)
312 44(r1) (=orig guest_state)
314 /* Has the guest state pointer been messed with? If yes, exit.
315 Also set up & VG_(tt_fast) early in an attempt at better
317 lwz 9,44(1) /* original guest_state ptr */
318 lis 5,VG_(tt_fast)@ha
319 addi 5,5,VG_(tt_fast)@l /* & VG_(tt_fast) */
323 /* save the jump address in the guest state */
324 stw 3,OFFSET_ppc32_CIA(31)
326 /* Are we out of timeslice? If yes, defer to scheduler. */
331 /* try a fast lookup in the translation cache */
332 /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
333 = ((r3 >>u 2) & VG_TT_FAST_MASK) << 3 */
334 rlwinm 4,3,1, 29-VG_TT_FAST_BITS, 28 /* entry# * 8 */
335 add 5,5,4 /* & VG_(tt_fast)[entry#] */
336 lwz 6,0(5) /* .guest */
337 lwz 7,4(5) /* .host */
339 bne fast_lookup_failed
341 /* increment bb profile counter */
342 srwi 4,4,1 /* entry# * sizeof(UInt*) */
343 addis 6,4,VG_(tt_fastN)@ha
344 lwz 9,VG_(tt_fastN)@l(6)
349 /* Found a match. Call .host. */
353 /* On return from guest code:
354 r3 holds destination (original) address.
355 r31 may be unchanged (guest_state), or may indicate further
356 details of the control transfer requested to *r3.
359 b VG_(run_innerloop__dispatch_profiled)
362 /*----------------------------------------------------*/
363 /*--- exit points ---*/
364 /*----------------------------------------------------*/
367 /* Someone messed with the gsp (in r31). Have to
368 defer to scheduler to resolve this. dispatch ctr
369 is not yet decremented, so no need to increment. */
370 /* %CIA is NOT up to date here. First, need to write
371 %r3 back to %CIA, but without trashing %r31 since
372 that holds the value we want to return to the scheduler.
373 Hence use %r5 transiently for the guest state pointer. */
374 lwz 5,44(1) /* original guest_state ptr */
375 stw 3,OFFSET_ppc32_CIA(5)
376 mr 3,31 /* r3 = new gsp value */
381 /* %CIA is up to date */
382 /* back out decrement of the dispatch counter */
384 li 3,VG_TRC_INNER_COUNTERZERO
388 /* %CIA is up to date */
389 /* back out decrement of the dispatch counter */
391 li 3,VG_TRC_INNER_FASTMISS
396 /* All exits from the dispatcher go through here.
397 r3 holds the return value.
400 /* We're leaving. Check that nobody messed with
403 /* Using r10 - value used again further on, so don't trash! */
404 lis 10,VG_(machine_ppc32_has_FP)@ha
405 lwz 10,VG_(machine_ppc32_has_FP)@l(10)
409 /* Set fpscr back to a known state, since vex-generated code
410 may have messed with fpscr[rm]. */
416 mtfsf 0xFF,3 /* fpscr = f3 */
419 /* Using r11 - value used again further on, so don't trash! */
420 lis 11,VG_(machine_ppc32_has_VMX)@ha
421 lwz 11,VG_(machine_ppc32_has_VMX)@l(11)
426 /* Check VSCR[NJ] == 1 */
427 /* first generate 4x 0x00010000 */
428 vspltisw 4,0x1 /* 4x 0x00000001 */
429 vspltisw 5,0x0 /* zero */
430 vsldoi 6,4,5,0x2 /* <<2*8 => 4x 0x00010000 */
431 /* retrieve VSCR and mask wanted bits */
433 vand 7,7,6 /* gives NJ flag */
434 vspltw 7,7,0x3 /* flags-word to all lanes */
435 vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */
436 bt 24,invariant_violation /* branch if all_equal */
440 /* otherwise we're OK */
441 b run_innerloop_exit_REALLY
445 li 3,VG_TRC_INVARIANT_FAILED
446 b run_innerloop_exit_REALLY
448 run_innerloop_exit_REALLY:
449 /* r3 holds VG_TRC_* value to return */
451 /* Return to parent stack */
454 /* Write ctr to VG(dispatch_ctr) */
455 lis 5,VG_(dispatch_ctr)@ha
456 stw 29,VG_(dispatch_ctr)@l(5)
462 /* Restore callee-saved registers... */
464 /* r10 already holds VG_(machine_ppc32_has_FP) value */
468 /* Floating-point regs */
510 /* r11 already holds VG_(machine_ppc32_has_VMX) value */
517 mfspr 4,256 /* VRSAVE reg is spr number 256 */
548 lwz 0,500(1) /* stack_size + 4 */
550 addi 1,1,496 /* stack_size */
552 .size VG_(run_innerloop), .-VG_(run_innerloop)
555 /*------------------------------------------------------------*/
557 /*--- A special dispatcher, for running no-redir ---*/
558 /*--- translations. Just runs the given translation once. ---*/
560 /*------------------------------------------------------------*/
563 void VG_(run_a_noredir_translation) ( UWord* argblock );
566 /* Run a no-redir translation. argblock points to 4 UWords, 2 to carry args
567 and 2 to carry results:
568 0: input: ptr to translation
569 1: input: ptr to guest state
570 2: output: next guest PC
571 3: output: guest state pointer afterwards (== thread return code)
573 .global VG_(run_a_noredir_translation)
574 .type VG_(run_a_noredir_translation), @function
575 VG_(run_a_noredir_translation):
576 /* save callee-save int regs, & lr */
631 .size VG_(run_a_noredir_translation), .-VG_(run_a_noredir_translation)
634 /* Let the linker know we don't need an executable stack */
635 .section .note.GNU-stack,"",@progbits
637 #endif // defined(VGP_ppc32_linux)
639 /*--------------------------------------------------------------------*/
641 /*--------------------------------------------------------------------*/