1 (***********************************************************************)
5 (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
7 (* Copyright 1996 Institut National de Recherche en Informatique et *)
8 (* en Automatique. All rights reserved. This file is distributed *)
9 (* under the terms of the Q Public License version 1.0. *)
11 (***********************************************************************)
13 (* $Id: emit.mlp 9475 2009-12-16 10:04:38Z xleroy $ *)
15 (* Emission of HP PA-RISC assembly code *)
17 (* Must come before open Reg... *)
34 (* Tradeoff between code size and code speed *)
36 let fastcode_flag = ref true
38 (* Layout of the stack *)
39 (* Always keep the stack 8-aligned. *)
41 let stack_offset = ref 0
46 4 * num_stack_slots.(0) + 8 * num_stack_slots.(1) +
47 (if !contains_calls then 4 else 0) in
50 let slot_offset loc cl =
52 Incoming n -> -frame_size() - n
55 then - !stack_offset - num_stack_slots.(1) * 8 - n * 4 - 4
56 else - !stack_offset - n * 8 - 8
62 emit_string "L$"; emit_int lbl
67 Emitaux.emit_symbol '$' s
69 (* Output a pseudo-register *)
73 Reg r -> emit_string (register_name r)
76 (* Output low address / high address prefixes *)
78 let low_prefix = "RR%"
79 let high_prefix = "LR%"
81 let is_immediate n = (n < 16) && (n >= -16) (* 5 bits *)
83 let emit_int_low n = emit_string low_prefix; emit_int n
84 let emit_int_high n = emit_string high_prefix; emit_int n
86 let emit_nativeint_low n = emit_string low_prefix; emit_nativeint n
87 let emit_nativeint_high n = emit_string high_prefix; emit_nativeint n
89 let emit_symbol_low s =
90 `RR%{emit_symbol s}-$global$`
92 let load_symbol_high s =
93 ` addil LR%{emit_symbol s}-$global$, %r27\n`
95 let load_symbol_offset_high s ofs =
96 ` addil LR%{emit_symbol s}-$global$+{emit_int ofs}, %r27\n`
98 (* Record imported and defined symbols *)
100 let used_symbols = ref StringSet.empty
101 let defined_symbols = ref StringSet.empty
102 let called_symbols = ref StringSet.empty
105 used_symbols := StringSet.add s !used_symbols
106 let define_symbol s =
107 defined_symbols := StringSet.add s !defined_symbols
109 used_symbols := StringSet.add s !used_symbols;
110 called_symbols := StringSet.add s !called_symbols
112 (* An external symbol is code if either it is branched to, or
113 it is one of the caml_apply* caml_curry* caml_tuplify* special functions. *)
115 let code_imports = ["caml_apply"; "caml_curry"; "caml_tuplify"]
117 let match_prefix s pref =
118 String.length s >= String.length pref
119 && String.sub s 0 (String.length pref) = pref
122 if not(StringSet.mem s !defined_symbols) then begin
123 ` .import {emit_symbol s}`;
124 if StringSet.mem s !called_symbols
125 || List.exists (match_prefix s) code_imports
130 let emit_imports () =
131 StringSet.iter emit_import !used_symbols;
132 used_symbols := StringSet.empty;
133 defined_symbols := StringSet.empty;
134 called_symbols := StringSet.empty
136 (* Output an integer load / store *)
138 let is_offset n = (n < 8192) && (n >= -8192) (* 14 bits *)
140 let is_offset_native n =
141 n < Nativeint.of_int 8192 && n >= Nativeint.of_int (-8192)
143 let emit_load instr addr arg dst =
148 ` {emit_string instr} {emit_symbol_low s}(%r1), {emit_reg dst}\n`
151 load_symbol_offset_high s ofs;
152 ` {emit_string instr} {emit_symbol_low s}+{emit_int ofs}(%r1), {emit_reg dst}\n`
154 if is_offset ofs then
155 ` {emit_string instr} {emit_int ofs}({emit_reg arg.(0)}), {emit_reg dst}\n`
157 ` addil {emit_int_high ofs}, {emit_reg arg.(0)}\n`;
158 ` {emit_string instr} {emit_int_low ofs}(%r1), {emit_reg dst}\n`
161 let emit_store instr addr arg src =
166 ` {emit_string instr} {emit_reg src}, {emit_symbol_low s}(%r1)\n`
169 load_symbol_offset_high s ofs;
170 ` {emit_string instr} {emit_reg src}, {emit_symbol_low s}+{emit_int ofs}(%r1)\n`
172 if is_offset ofs then
173 ` {emit_string instr} {emit_reg src}, {emit_int ofs}({emit_reg arg.(1)})\n`
175 ` addil {emit_int_high ofs}, {emit_reg arg.(0)}\n`;
176 ` {emit_string instr} {emit_reg src}, {emit_int_low ofs}(%r1)\n`
179 (* Output a floating-point load / store *)
181 let emit_float_load addr arg dst doubleword =
186 ` ldo {emit_symbol_low s}(%r1), %r1\n`;
187 ` fldws 0(%r1), {emit_reg dst}L\n`;
189 ` fldws 4(%r1), {emit_reg dst}R\n`
192 load_symbol_offset_high s ofs;
193 ` ldo {emit_symbol_low s}+{emit_int ofs}(%r1), %r1\n`;
194 ` fldws 0(%r1), {emit_reg dst}L\n`;
196 ` fldws 4(%r1), {emit_reg dst}R\n`
198 if is_immediate ofs && (is_immediate (ofs+4) || not doubleword)
200 ` fldws {emit_int ofs}({emit_reg arg.(0)}), {emit_reg dst}L\n`;
202 ` fldws {emit_int (ofs+4)}({emit_reg arg.(0)}), {emit_reg dst}R\n`
204 if is_offset ofs then
205 ` ldo {emit_int ofs}({emit_reg arg.(0)}), %r1\n`
207 ` addil {emit_int_high ofs}, {emit_reg arg.(0)}\n`;
208 ` ldo {emit_int_low ofs}(%r1), %r1\n`
210 ` fldws 0(%r1), {emit_reg dst}L\n`;
212 ` fldws 4(%r1), {emit_reg dst}R\n`
215 let emit_float_store addr arg src doubleword =
220 ` ldo {emit_symbol_low s}(%r1), %r1\n`;
221 ` fstws {emit_reg src}L, 0(%r1)\n`;
223 ` fstws {emit_reg src}R, 4(%r1)\n`
226 load_symbol_offset_high s ofs;
227 ` ldo {emit_symbol_low s}+{emit_int ofs}(%r1), %r1\n`;
228 ` fstws {emit_reg src}L, 0(%r1)\n`;
230 ` fstws {emit_reg src}R, 4(%r1)\n`
232 if is_immediate ofs && (is_immediate (ofs+4) || not doubleword)
234 ` fstws {emit_reg src}L, {emit_int ofs}({emit_reg arg.(1)})\n`;
236 ` fstws {emit_reg src}R, {emit_int(ofs+4)}({emit_reg arg.(1)})\n`
238 if is_offset ofs then
239 ` ldo {emit_int ofs}({emit_reg arg.(1)}), %r1\n`
241 ` addil {emit_int_high ofs}, {emit_reg arg.(1)}\n`;
242 ` ldo {emit_int_low ofs}(%r1), %r1\n`
244 ` fstws {emit_reg src}L, 0(%r1)\n`;
246 ` fstws {emit_reg src}R, 4(%r1)\n`
249 (* Output an align directive. *)
252 ` .align {emit_int n}\n`
254 (* Record live pointers at call points *)
257 { fd_lbl: int; (* Return address *)
258 fd_frame_size: int; (* Size of stack frame *)
259 fd_live_offset: int list } (* Offsets/regs of live addresses *)
261 let frame_descriptors = ref([] : frame_descr list)
263 let record_frame live =
264 let lbl = new_label() in
265 let live_offset = ref [] in
268 {typ = Addr; loc = Reg r} ->
269 live_offset := ((r lsl 1) + 1) :: !live_offset
270 | {typ = Addr; loc = Stack s} as reg ->
271 live_offset := slot_offset s (register_class reg) :: !live_offset
276 fd_frame_size = frame_size();
277 fd_live_offset = !live_offset } :: !frame_descriptors;
278 `{emit_label lbl}:\n`
281 ` .long {emit_label fd.fd_lbl} + 3\n`;
282 ` .short {emit_int fd.fd_frame_size}\n`;
283 ` .short {emit_int (List.length fd.fd_live_offset)}\n`;
286 ` .short {emit_int n}\n`)
290 (* Record floating-point constants *)
292 let float_constants = ref ([] : (int * string) list)
294 let emit_float_constants () =
295 if Config.system = "hpux" then begin
304 emit_float64_split_directive ".long" cst)
306 float_constants := []
308 (* Describe the registers used to pass arguments to a C function *)
310 let describe_call arg =
313 for i = 0 to Array.length arg - 1 do
314 if !pos < 4 then begin
315 match arg.(i).typ with
316 Float -> `, ARGW{emit_int !pos}=FR, ARGW{emit_int(!pos + 1)}=FU`;
318 | _ -> `, ARGW{emit_int !pos}=GR`;
324 (* Output a function call *)
326 let emit_call s retreg =
328 ` bl {emit_symbol s}, {emit_string retreg}\n`
330 (* Names of various instructions *)
332 let name_for_int_operation = function
340 let name_for_float_operation = function
342 | Isubf -> "fsub,dbl"
343 | Imulf -> "fmpy,dbl"
344 | Idivf -> "fdiv,dbl"
347 let name_for_specific_operation = function
348 Ishift1add -> "sh1add"
349 | Ishift2add -> "sh2add"
350 | Ishift3add -> "sh3add"
352 let name_for_int_comparison = function
353 Isigned Ceq -> "=" | Isigned Cne -> "<>"
354 | Isigned Cle -> "<=" | Isigned Cgt -> ">"
355 | Isigned Clt -> "<" | Isigned Cge -> ">="
356 | Iunsigned Ceq -> "=" | Iunsigned Cne -> "<>"
357 | Iunsigned Cle -> "<<=" | Iunsigned Cgt -> ">>"
358 | Iunsigned Clt -> "<<" | Iunsigned Cge -> ">>="
360 let name_for_float_comparison cmp neg =
362 Ceq -> if neg then "=" else "!="
363 | Cne -> if neg then "!=" else "="
364 | Cle -> if neg then "<=" else "!<="
365 | Cgt -> if neg then ">" else "!>"
366 | Clt -> if neg then "<" else "!<"
367 | Cge -> if neg then ">=" else "!>="
369 let negate_int_comparison = function
370 Isigned cmp -> Isigned(Cmm.negate_comparison cmp)
371 | Iunsigned cmp -> Iunsigned(Cmm.negate_comparison cmp)
373 let swap_int_comparison = function
374 Isigned cmp -> Isigned(Cmm.swap_comparison cmp)
375 | Iunsigned cmp -> Iunsigned(Cmm.swap_comparison cmp)
378 (* Output the assembly code for an instruction *)
380 (* Name of current function *)
381 let function_name = ref ""
382 (* Entry point for tail recursive calls *)
383 let tailrec_entry_point = ref 0
384 (* Label of trap for out-of-range accesses *)
385 let range_check_trap = ref 0
387 let rec emit_instr i dslot =
390 | Lop(Imove | Ispill | Ireload) ->
391 let src = i.arg.(0) and dst = i.res.(0) in
392 begin match (src, dst) with
393 {loc = Reg rs; typ = (Int | Addr)}, {loc = Reg rd} ->
394 ` copy {emit_reg src}, {emit_reg dst}\n`
395 | {loc = Reg rs; typ = Float}, {loc = Reg rd; typ = Float} ->
396 ` fcpy,dbl {emit_reg src}, {emit_reg dst}\n`
397 | {loc = Reg rs; typ = (Int | Addr)}, {loc = Stack sd} ->
398 let ofs = slot_offset sd 0 in
399 ` stw {emit_reg src}, {emit_int ofs}(%r30)\n`
400 | {loc = Reg rs; typ = Float}, {loc = Stack sd} ->
401 let ofs = slot_offset sd 1 in
402 if is_immediate ofs then
403 ` fstds {emit_reg src}, {emit_int ofs}(%r30)\n`
405 ` ldo {emit_int ofs}(%r30), %r1\n`;
406 ` fstds {emit_reg src}, 0(%r1)\n`
408 | {loc = Stack ss; typ = (Int | Addr)}, {loc = Reg rd} ->
409 let ofs = slot_offset ss 0 in
410 ` ldw {emit_int ofs}(%r30), {emit_reg dst}\n`
411 | {loc = Stack ss; typ = Float}, {loc = Reg rd} ->
412 let ofs = slot_offset ss 1 in
413 if is_immediate ofs then
414 ` fldds {emit_int ofs}(%r30), {emit_reg dst}\n`
416 ` ldo {emit_int ofs}(%r30), %r1\n`;
417 ` fldds 0(%r1), {emit_reg dst}\n`
422 | Lop(Iconst_int n) ->
423 if is_offset_native n then
424 ` ldi {emit_nativeint n}, {emit_reg i.res.(0)}\n`
426 ` ldil {emit_nativeint_high n}, {emit_reg i.res.(0)}\n`;
427 ` ldo {emit_nativeint_low n}({emit_reg i.res.(0)}), {emit_reg i.res.(0)}\n`
429 | Lop(Iconst_float s) ->
430 let lbl = new_label() in
431 float_constants := (lbl, s) :: !float_constants;
432 ` ldil {emit_string high_prefix}{emit_label lbl}, %r1\n`;
433 ` ldo {emit_string low_prefix}{emit_label lbl}(%r1), %r1\n`;
434 ` fldds 0(%r1), {emit_reg i.res.(0)}\n`
435 | Lop(Iconst_symbol s) ->
438 ` ldo {emit_symbol_low s}(%r1), {emit_reg i.res.(0)}\n`
440 ` ble 0(4, {emit_reg i.arg.(0)})\n`; (* retaddr in %r31 *)
441 ` copy %r31, %r2\n`; (* in delay slot: save retaddr in %r2 *)
443 | Lop(Icall_imm s) ->
445 fill_delay_slot dslot;
447 | Lop(Itailcall_ind) ->
448 let n = frame_size() in
449 ` bv 0({emit_reg i.arg.(0)})\n`;
450 if !contains_calls (* in delay slot *)
451 then ` ldwm {emit_int(-n)}(%r30), %r2\n`
452 else ` ldo {emit_int(-n)}(%r30), %r30\n`
453 | Lop(Itailcall_imm s) ->
454 let n = frame_size() in
455 if s = !function_name then begin
456 ` b,n {emit_label !tailrec_entry_point}\n`
459 if !contains_calls (* in delay slot *)
460 then ` ldwm {emit_int(-n)}(%r30), %r2\n`
461 else ` ldo {emit_int(-n)}(%r30), %r30\n`
463 | Lop(Iextcall(s, alloc)) ->
466 ` ldil LR%{emit_symbol s}, %r22\n`;
468 emit_call "caml_c_call" "%r2";
469 ` ldo RR%{emit_symbol s}(%r22), %r22\n`; (* in delay slot *)
474 fill_delay_slot dslot
476 | Lop(Istackoffset n) ->
477 ` ldo {emit_int n}(%r30), %r30\n`;
478 stack_offset := !stack_offset + n
479 | Lop(Iload(chunk, addr)) ->
480 let dest = i.res.(0) in
481 begin match chunk with
483 emit_load "ldb" addr i.arg dest
485 emit_load "ldb" addr i.arg dest;
486 ` extrs {emit_reg dest}, 31, 8, {emit_reg dest}\n`
487 | Sixteen_unsigned ->
488 emit_load "ldh" addr i.arg dest
490 emit_load "ldh" addr i.arg dest;
491 ` extrs {emit_reg dest}, 31, 16, {emit_reg dest}\n`
493 emit_float_load addr i.arg dest false;
494 ` fcnvff,sgl,dbl {emit_reg dest}L, {emit_reg dest}\n`
495 | Double | Double_u ->
496 emit_float_load addr i.arg dest true
498 emit_load "ldw" addr i.arg dest
500 | Lop(Istore(chunk, addr)) ->
501 let src = i.arg.(0) in
502 begin match chunk with
503 Byte_unsigned | Byte_signed ->
504 emit_store "stb" addr i.arg src
505 | Sixteen_unsigned | Sixteen_signed ->
506 emit_store "sth" addr i.arg src
508 ` fcnvff,dbl,sgl {emit_reg src}, %fr31L\n`;
509 emit_float_store addr i.arg (phys_reg 127) (* %fr31 *) false
510 | Double | Double_u ->
511 emit_float_store addr i.arg src true
513 emit_store "stw" addr i.arg src
516 if !fastcode_flag then begin
517 let lbl_cont = new_label() in
518 ` ldw 0(%r4), %r1\n`;
519 ` ldo {emit_int (-n)}(%r3), %r3\n`;
520 ` comb,>>= %r3, %r1, {emit_label lbl_cont}\n`;
521 ` addi 4, %r3, {emit_reg i.res.(0)}\n`; (* in delay slot *)
522 emit_call "caml_call_gc" "%r2";
523 (* Cannot use %r1 to pass size, since clobbered by glue call code *)
524 ` ldi {emit_int n}, %r29\n`; (* in delay slot *)
526 ` addi 4, %r3, {emit_reg i.res.(0)}\n`;
527 `{emit_label lbl_cont}:\n`
529 emit_call "caml_allocN" "%r2";
530 (* Cannot use %r1 either *)
531 ` ldi {emit_int n}, %r29\n`; (* in delay slot *)
533 ` addi 4, %r3, {emit_reg i.res.(0)}\n`
535 | Lop(Iintop Imul) ->
536 ` stws,ma {emit_reg i.arg.(0)}, 8(%r30)\n`;
537 ` stw {emit_reg i.arg.(1)}, -4(%r30)\n`;
538 ` fldws -8(%r30), %fr31L\n`;
539 ` fldws -4(%r30), %fr31R\n`;
540 ` xmpyu %fr31L, %fr31R, %fr31\n`;
541 ` fstws %fr31R, -8(%r30)\n`; (* poor scheduling *)
542 ` ldws,mb -8(%r30), {emit_reg i.res.(0)}\n`
543 | Lop(Iintop Idiv) ->
544 (* Arguments are assumed to be in %r26 and %r25, result in %r29 *)
545 ` bl $$divI, %r31\n`;
546 fill_delay_slot dslot
547 | Lop(Iintop Imod) ->
548 (* Arguments are assumed to be in %r26 and %r25, result in %r29 *)
549 ` bl $$remI, %r31\n`;
550 fill_delay_slot dslot
551 | Lop(Iintop Ilsl) ->
552 ` subi 31, {emit_reg i.arg.(1)}, %r1\n`;
554 ` zvdep {emit_reg i.arg.(0)}, 32, {emit_reg i.res.(0)}\n`
555 | Lop(Iintop Ilsr) ->
556 ` mtsar {emit_reg i.arg.(1)}\n`;
557 ` vshd %r0, {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n`
558 | Lop(Iintop Iasr) ->
559 ` subi 31, {emit_reg i.arg.(1)}, %r1\n`;
561 ` vextrs {emit_reg i.arg.(0)}, 32, {emit_reg i.res.(0)}\n`
562 | Lop(Iintop(Icomp cmp)) ->
563 let comp = name_for_int_comparison(negate_int_comparison cmp) in
564 ` comclr,{emit_string comp} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`;
565 ` ldi 1, {emit_reg i.res.(0)}\n`
566 | Lop(Iintop Icheckbound) ->
567 if !range_check_trap = 0 then range_check_trap := new_label();
568 ` comclr,>> {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, %r0\n`;
569 ` b,n {emit_label !range_check_trap}\n`
571 let instr = name_for_int_operation op in
572 ` {emit_string instr} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`
573 | Lop(Iintop_imm(Iadd, n)) ->
574 ` addi {emit_int n}, {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n`
575 | Lop(Iintop_imm(Isub, n)) ->
576 ` addi {emit_int(-n)}, {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n`
577 | Lop(Iintop_imm(Idiv, n)) ->
578 let l = Misc.log2 n in
579 ` comclr,>= {emit_reg i.arg.(0)}, %r0, %r1\n`;
581 ` zdepi -1, 31, {emit_int l}, %r1\n`
583 ` xor %r1, %r1, %r1\n`;
584 ` add {emit_reg i.arg.(0)}, %r1, %r1\n`;
585 ` extrs %r1, {emit_int(31-l)}, {emit_int(32-l)}, {emit_reg i.res.(0)}\n`
586 | Lop(Iintop_imm(Imod, n)) ->
587 let l = Misc.log2 n in
588 ` comclr,>= {emit_reg i.arg.(0)}, %r0, %r1\n`;
590 ` zdepi -1, 31, {emit_int l}, %r1\n`
592 ` xor %r1, %r1, %r1\n`;
593 ` add {emit_reg i.arg.(0)}, %r1, %r1\n`;
594 ` depi 0, 31, {emit_int l}, %r1\n`;
595 ` sub {emit_reg i.arg.(0)}, %r1, {emit_reg i.res.(0)}\n`
596 | Lop(Iintop_imm(Ilsl, n)) ->
598 ` zdep {emit_reg i.arg.(0)}, {emit_int(31-n)}, {emit_int(32-n)}, {emit_reg i.res.(0)}\n`
599 | Lop(Iintop_imm(Ilsr, n)) ->
601 ` extru {emit_reg i.arg.(0)}, {emit_int(31-n)}, {emit_int(32-n)}, {emit_reg i.res.(0)}\n`
602 | Lop(Iintop_imm(Iasr, n)) ->
604 ` extrs {emit_reg i.arg.(0)}, {emit_int(31-n)}, {emit_int(32-n)}, {emit_reg i.res.(0)}\n`
605 | Lop(Iintop_imm(Icomp cmp, n)) ->
606 let comp = name_for_int_comparison(negate_int_comparison(swap_int_comparison cmp)) in
607 ` comiclr,{emit_string comp} {emit_int n}, {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n`;
608 ` ldi 1, {emit_reg i.res.(0)}\n`
609 | Lop(Iintop_imm(Icheckbound, n)) ->
610 if !range_check_trap = 0 then range_check_trap := new_label();
611 ` comiclr,<< {emit_int n}, {emit_reg i.arg.(0)}, %r0\n`;
612 ` b,n {emit_label !range_check_trap}\n`
613 | Lop(Iintop_imm(op, n)) ->
615 | Lop(Iaddf | Isubf | Imulf | Idivf as op) ->
616 let instr = name_for_float_operation op in
617 ` {emit_string instr} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`
619 ` fsub,dbl 0, {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n`
621 ` fabs,dbl {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n`
622 | Lop(Ifloatofint) ->
623 ` stws,ma {emit_reg i.arg.(0)}, 8(%r30)\n`;
624 ` fldws,mb -8(%r30), %fr31L\n`;
625 ` fcnvxf,sgl,dbl %fr31L, {emit_reg i.res.(0)}\n`
626 | Lop(Iintoffloat) ->
627 ` fcnvfxt,dbl,sgl {emit_reg i.arg.(0)}, %fr31L\n`;
628 ` fstws,ma %fr31L, 8(%r30)\n`;
629 ` ldws,mb -8(%r30), {emit_reg i.res.(0)}\n`
630 | Lop(Ispecific sop) ->
631 let instr = name_for_specific_operation sop in
632 ` {emit_string instr} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`
634 let n = frame_size() in
635 ` ldw {emit_int(-n)}(%r30), %r2\n`
637 let n = frame_size() in
639 ` ldo {emit_int(-n)}(%r30), %r30\n` (* in delay slot *)
641 `{emit_label lbl}:\n`
643 begin match dslot with
645 ` b,n {emit_label lbl}\n`
647 ` b {emit_label lbl}\n`;
650 | Lcondbranch(tst, lbl) ->
653 emit_comib "<>" "=" 0 i.arg lbl dslot
655 emit_comib "=" "<>" 0 i.arg lbl dslot
657 let comp = name_for_int_comparison cmp
659 name_for_int_comparison(negate_int_comparison cmp) in
660 emit_comb comp negcomp i.arg lbl dslot
661 | Iinttest_imm(cmp, n) ->
662 let scmp = swap_int_comparison cmp in
663 let comp = name_for_int_comparison scmp
665 name_for_int_comparison(negate_int_comparison scmp) in
666 emit_comib comp negcomp n i.arg lbl dslot
667 | Ifloattest(cmp, neg) ->
668 let comp = name_for_float_comparison cmp neg in
669 ` fcmp,dbl,{emit_string comp} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
671 ` b {emit_label lbl}\n`;
672 fill_delay_slot dslot
674 emit_comib "OD" "EV" 0 i.arg lbl dslot
676 emit_comib "EV" "OD" 0 i.arg lbl dslot
678 | Lcondbranch3(lbl0, lbl1, lbl2) ->
679 begin match lbl0 with
681 | Some lbl -> emit_comib "=" "<>" 0 i.arg lbl None
683 begin match lbl1 with
685 | Some lbl -> emit_comib "=" "<>" 1 i.arg lbl None
687 begin match lbl2 with
689 | Some lbl -> emit_comib "=" "<>" 2 i.arg lbl None
692 ` blr {emit_reg i.arg.(0)}, 0\n`;
693 fill_delay_slot dslot;
694 for i = 0 to Array.length jumptbl - 1 do
695 ` b {emit_label jumptbl.(i)}\n`;
699 ` bl {emit_label lbl}, %r1\n`;
700 fill_delay_slot dslot
702 stack_offset := !stack_offset + 8;
703 ` stws,ma %r5, 8(%r30)\n`;
704 ` stw %r1, -4(%r30)\n`;
707 ` ldws,mb -8(%r30), %r5\n`;
708 stack_offset := !stack_offset - 8
710 ` ldw -4(%r5), %r1\n`;
713 ` ldws,mb -8(%r30), %r5\n` (* in delay slot *)
715 and fill_delay_slot = function
717 | Some i -> emit_instr i None
719 and emit_delay_slot = function
721 | Some i -> emit_instr i None
723 and emit_comb comp negcomp arg lbl dslot =
724 if lbl >= 0 then begin
725 ` comb,{emit_string comp} {emit_reg arg.(0)}, {emit_reg arg.(1)}, {emit_label lbl}\n`;
726 fill_delay_slot dslot
728 emit_delay_slot dslot;
729 ` comclr,{emit_string negcomp} {emit_reg arg.(0)}, {emit_reg arg.(1)}, %r0\n`;
730 ` b,n {emit_label (-lbl)}\n`
733 and emit_comib comp negcomp cst arg lbl dslot =
734 if lbl >= 0 then begin
735 ` comib,{emit_string comp} {emit_int cst}, {emit_reg arg.(0)}, {emit_label lbl}\n`;
736 fill_delay_slot dslot
738 emit_delay_slot dslot;
739 ` comiclr,{emit_string negcomp} {emit_int cst}, {emit_reg arg.(0)}, %r0\n`;
740 ` b,n {emit_label (-lbl)}\n`
743 (* Checks if a pseudo-instruction expands to exactly one machine instruction
744 that does not branch. *)
750 Imove | Ispill | Ireload ->
751 begin match (i.arg.(0), i.res.(0)) with
752 ({typ = Float; loc = Stack s}, _) -> is_immediate(slot_offset s 1)
753 | (_, {typ = Float; loc = Stack s}) -> is_immediate(slot_offset s 1)
756 | Iconst_int n -> is_offset_native n
757 | Istackoffset _ -> true
758 | Iload(_, Iindexed n) -> i.res.(0).typ <> Float & is_offset n
759 | Istore(_, Iindexed n) -> i.arg.(0).typ <> Float & is_offset n
760 | Iintop(Iadd | Isub | Iand | Ior | Ixor) -> true
761 | Iintop_imm((Iadd | Isub | Ilsl | Ilsr | Iasr), _) -> true
762 | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf -> true
763 | Ispecific _ -> true
766 | Lreloadretaddr -> true
769 let no_interference res arg =
771 for i = 0 to Array.length arg - 1 do
772 for j = 0 to Array.length res - 1 do
773 if arg.(i).loc = res.(j).loc then raise Exit
780 (* Emit a sequence of instructions, trying to fill delay slots for branches *)
785 | {next = {desc = Lop(Icall_imm _)
786 | Lop(Iextcall(_, false))
787 | Lop(Iintop(Idiv | Imod))
790 when is_one_instr i ->
791 emit_instr i.next (Some i);
793 | {next = {desc = Lcondbranch(_, _) | Lswitch _}}
794 when is_one_instr i & no_interference i.res i.next.arg ->
795 emit_instr i.next (Some i);
801 (* Estimate the size of an instruction, in actual HPPA instructions *)
803 let is_float_stack r =
804 match r with {loc = Stack _; typ = Float} -> true | _ -> false
811 Imove | Ispill | Ireload ->
812 if is_float_stack i.arg.(0) || is_float_stack i.res.(0)
813 then 2 (* ldo/fxxx *) else 1
815 if is_offset_native n then 1 else 2 (* ldi or ldil/ldo *)
816 | Iconst_float _ -> 3 (* ldil/ldo/fldds *)
817 | Iconst_symbol _ -> 2 (* addil/ldo *)
818 | Icall_ind -> 2 (* ble/copy *)
819 | Icall_imm _ -> 2 (* bl/nop *)
820 | Itailcall_ind -> 2 (* bv/ldwm *)
821 | Itailcall_imm _ -> 2 (* bl/ldwm *)
822 | Iextcall(_, alloc) ->
823 if alloc then 3 (* ldil/bl/ldo *) else 2 (* bl/nop *)
824 | Istackoffset _ -> 1 (* ldo *)
825 | Iload(chunk, addr) ->
826 if i.res.(0).typ = Float
827 then 4 (* addil/ldo/fldws/fldws *)
828 else (match addr with Iindexed ofs when is_offset ofs -> 1 | _ -> 2)
829 + (match chunk with Byte_signed -> 1 | Sixteen_signed -> 1 | _ -> 0)
830 | Istore(chunk, addr) ->
831 if i.arg.(0).typ = Float
832 then 4 (* addil/ldo/fstws/fstws *)
833 else (match addr with Iindexed ofs when is_offset ofs -> 1 | _ -> 2)
834 | Ialloc _ -> if !fastcode_flag then 7 else 3
836 | Iintop(Idiv | Imod) -> 3 (* ldil/ble/nop *)
837 | Iintop Ilsl -> 3 (* subi/mtsar/zvdep *)
838 | Iintop Ilsr -> 2 (* mtsar/vshd *)
839 | Iintop Iasr -> 3 (* subi/mtsar/vextrs *)
840 | Iintop(Icomp _) -> 2 (* comclr/ldi *)
841 | Iintop Icheckbound -> 2 (* comclr/b,n *)
843 | Iintop_imm(Idiv, _) -> 4 (* comclr/zdepi/add/extrs *)
844 | Iintop_imm(Imod, _) -> 5 (* comclr/zdepi/add/extrs/sub *)
845 | Iintop_imm(Icomp _, _) -> 2 (* comiclr/ldi *)
846 | Iintop_imm(Icheckbound, _) -> 2 (* comiclr/b,n *)
847 | Iintop_imm(_, _) -> 1
848 | Ifloatofint -> 3 (* stws,ma/fldws,mb/fcnvxf *)
849 | Iintoffloat -> 3 (* fcnfxt/fstws/ldws *)
850 | _ (* Inegf|Iabsf|Iaddf|Isubf|Imulf|Idivf|Ispecific _ *) -> 1
852 | Lreloadretaddr -> 1
855 | Lbranch _ -> 1 (* b,n *)
856 | Lcondbranch(Ifloattest(_, _), _) -> 4 (* fcmp/ftest/b/nop *)
857 | Lcondbranch(_, _) -> 2 (* comb/nop or comclr/b,n *)
858 | Lcondbranch3(_, _, _) -> 6 (* worst case: three comib/nop or comclr/b,n *)
859 | Lswitch tbl -> 2 + 2 * Array.length tbl (* blr/nop b/nop *)
860 | Lsetuptrap _ -> 2 (* bl/nop *)
861 | Lpushtrap -> 3 (* stws,ma/stw/copy *)
862 | Lpoptrap -> 1 (* ldws,mb *)
863 | Lraise -> 4 (* ldw/copy/bv/ldws,mb *)
865 (* Estimate the position of all labels in function body
866 and rewrite long conditional branches with a negative label. *)
868 let fixup_cond_branches funbody =
870 (Hashtbl.create 87 : (label, int) Hashtbl.t) in
871 let rec estimate_labels pos i =
875 Hashtbl.add label_position lbl pos; estimate_labels pos i.next
876 | _ -> estimate_labels (pos + sizeof_instr i) i.next in
877 let long_branch currpos lbl =
879 let displ = Hashtbl.find label_position lbl - currpos in
880 (* Branch offset is stored in 12 bits, giving a range of
881 -2048 to +2047. Here, we allow 10% error in estimating
882 the code positions. *)
883 displ < -1843 || displ > 1842
886 let rec fix_branches pos i =
889 | Lcondbranch(tst, lbl) ->
890 if long_branch pos lbl then i.desc <- Lcondbranch(tst, -lbl);
891 fix_branches (pos + sizeof_instr i) i.next
892 | Lcondbranch3(opt1, opt2, opt3) ->
893 let fix_opt = function
895 | Some lbl -> Some(if long_branch pos lbl then -lbl else lbl) in
896 i.desc <- Lcondbranch3(fix_opt opt1, fix_opt opt2, fix_opt opt3);
897 fix_branches (pos + sizeof_instr i) i.next
899 fix_branches (pos + sizeof_instr i) i.next in
900 estimate_labels 0 funbody;
901 fix_branches 0 funbody
903 (* Emission of a function declaration *)
905 let fundecl fundecl =
906 fixup_cond_branches fundecl.fun_body;
907 function_name := fundecl.fun_name;
908 fastcode_flag := fundecl.fun_fast;
909 tailrec_entry_point := new_label();
911 float_constants := [];
912 define_symbol fundecl.fun_name;
913 range_check_trap := 0;
914 let n = frame_size() in
915 begin match Config.system with
919 ` .export {emit_symbol fundecl.fun_name}, entry, priv_lev=3\n`;
920 `{emit_symbol fundecl.fun_name}:\n`;
922 if !contains_calls then
923 ` .callinfo frame={emit_int n}, calls, save_rp\n`
925 ` .callinfo frame={emit_int n}, no_calls\n`;
930 ` .globl {emit_symbol fundecl.fun_name}\n`;
931 `{emit_symbol fundecl.fun_name}:\n`
935 if !contains_calls then
936 ` stwm %r2, {emit_int n}(%r30)\n`
938 ` ldo {emit_int n}(%r30), %r30\n`;
939 `{emit_label !tailrec_entry_point}:\n`;
940 emit_all fundecl.fun_body;
941 if !range_check_trap > 0 then begin
942 `{emit_label !range_check_trap}:\n`;
943 emit_call "caml_ml_array_bound_error" "%r31";
946 if Config.system = "hpux"then begin
950 emit_float_constants()
952 (* Emission of data *)
954 let declare_global s =
956 if Config.system = "hpux"
957 then ` .export {emit_symbol s}, data\n`
958 else ` .globl {emit_symbol s}\n`
960 let emit_item = function
963 | Cdefine_symbol s ->
966 | Cdefine_label lbl ->
967 `{emit_label (lbl + 100000)}:\n`
969 ` .byte {emit_int n}\n`
971 ` .short {emit_int n}\n`
973 ` .long {emit_nativeint n}\n`
975 ` .long {emit_nativeint n}\n`
977 emit_float32_directive ".long" f
979 emit_float64_split_directive ".long" f
980 | Csymbol_address s ->
982 ` .long {emit_symbol s}\n`
983 | Clabel_address lbl ->
984 ` .long {emit_label(lbl + 100000)}\n`
986 emit_string_directive " .ascii " s
989 if Config.system = "hpux"
990 then ` .block {emit_int n}\n`
991 else ` .space {emit_int n}\n`
997 List.iter emit_item l
999 (* Beginning / end of an assembly file *)
1001 let begin_assembly() =
1002 if Config.system = "hpux" then begin
1003 ` .space $PRIVATE$\n`;
1004 ` .subspa $DATA$,quad=1,align=8,access=31\n`;
1005 ` .subspa $BSS$,quad=1,align=8,access=31,zero,sort=82\n`;
1007 ` .subspa $LIT$,quad=0,align=8,access=44\n`;
1008 ` .subspa $CODE$,quad=0,align=8,access=44,code_only\n`;
1009 ` .import $global$, data\n`;
1010 ` .import $$divI, millicode\n`;
1011 ` .import $$remI, millicode\n`
1013 used_symbols := StringSet.empty;
1014 defined_symbols := StringSet.empty;
1015 called_symbols := StringSet.empty;
1016 let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
1018 declare_global lbl_begin;
1019 `{emit_symbol lbl_begin}:\n`;
1020 let lbl_begin = Compilenv.make_symbol (Some "code_begin") in
1022 declare_global lbl_begin;
1023 `{emit_symbol lbl_begin}:\n`
1026 let end_assembly() =
1028 let lbl_end = Compilenv.make_symbol (Some "code_end") in
1029 declare_global lbl_end;
1030 `{emit_symbol lbl_end}:\n`;
1032 let lbl_end = Compilenv.make_symbol (Some "data_end") in
1033 declare_global lbl_end;
1034 `{emit_symbol lbl_end}:\n`;
1036 let lbl = Compilenv.make_symbol (Some "frametable") in
1038 `{emit_symbol lbl}:\n`;
1039 ` .long {emit_int (List.length !frame_descriptors)}\n`;
1040 List.iter emit_frame !frame_descriptors;
1041 frame_descriptors := [];