1 (***********************************************************************)
5 (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
7 (* Copyright 1998 Institut National de Recherche en Informatique et *)
8 (* en Automatique. All rights reserved. This file is distributed *)
9 (* under the terms of the Q Public License version 1.0. *)
11 (***********************************************************************)
13 (* $Id: emit.mlp 9314 2009-07-15 12:14:39Z xleroy $ *)
15 (* Emission of ARM assembly code *)
27 (* Tradeoff between code size and code speed *)
29 let fastcode_flag = ref true
34 emit_string ".L"; emit_int lbl
39 Emitaux.emit_symbol '$' s
41 (* Output a pseudo-register *)
45 Reg r -> emit_string (register_name r)
46 | _ -> fatal_error "Emit_arm.emit_reg"
48 (* Output the next register after the given pseudo-register *)
52 Reg r -> emit_string (register_name(r + 1))
53 | _ -> fatal_error "Emit_arm.emit_next_reg"
55 (* Layout of the stack frame *)
57 let stack_offset = ref 0
61 4 * num_stack_slots.(0) + 8 * num_stack_slots.(1) +
62 (if !contains_calls then 4 else 0)
64 let slot_offset loc cl =
66 Incoming n -> frame_size() + n
69 then !stack_offset + num_stack_slots.(1) * 8 + n * 4
70 else !stack_offset + n * 8
73 (* Output a stack reference *)
78 let ofs = slot_offset s (register_class r) in `[sp, #{emit_int ofs}]`
79 | _ -> fatal_error "Emit_arm.emit_stack"
81 (* Output an addressing mode *)
83 let emit_addressing addr r n =
86 `[{emit_reg r.(n)}, #{emit_int ofs}]`
88 (* Record live pointers at call points *)
91 { fd_lbl: int; (* Return address *)
92 fd_frame_size: int; (* Size of stack frame *)
93 fd_live_offset: int list } (* Offsets/regs of live addresses *)
95 let frame_descriptors = ref([] : frame_descr list)
97 let record_frame live =
98 let lbl = new_label() in
99 let live_offset = ref [] in
102 {typ = Addr; loc = Reg r} ->
103 live_offset := (r lsl 1) + 1 :: !live_offset
104 | {typ = Addr; loc = Stack s} as reg ->
105 live_offset := slot_offset s (register_class reg) :: !live_offset
110 fd_frame_size = frame_size();
111 fd_live_offset = !live_offset } :: !frame_descriptors;
115 ` .word {emit_label fd.fd_lbl} + 4\n`;
116 ` .short {emit_int fd.fd_frame_size}\n`;
117 ` .short {emit_int (List.length fd.fd_live_offset)}\n`;
120 ` .short {emit_int n}\n`)
124 (* Names of various instructions *)
126 let name_for_comparison = function
127 Isigned Ceq -> "eq" | Isigned Cne -> "ne" | Isigned Cle -> "le"
128 | Isigned Cge -> "ge" | Isigned Clt -> "lt" | Isigned Cgt -> "gt"
129 | Iunsigned Ceq -> "eq" | Iunsigned Cne -> "ne" | Iunsigned Cle -> "ls"
130 | Iunsigned Cge -> "cs" | Iunsigned Clt -> "cc" | Iunsigned Cgt -> "hi"
132 let name_for_float_comparison cmp neg =
134 Ceq -> if neg then "ne" else "eq"
135 | Cne -> if neg then "eq" else "ne"
136 | Cle -> if neg then "hi" else "ls"
137 | Cge -> if neg then "lt" else "ge"
138 | Clt -> if neg then "pl" else "mi"
139 | Cgt -> if neg then "le" else "gt"
141 let name_for_int_operation = function
150 let name_for_shift_operation = function
156 let name_for_shift_int_operation = function
159 | Ishiftsubrev -> "rsb"
161 let name_for_float_operation = function
168 | Ifloatofint -> "fltd"
169 | Iintoffloat -> "fixz"
172 (* Recognize immediate operands *)
174 (* Immediate operands are 8-bit immediate values, zero-extended, and rotated
175 right by 0, 2, 4, ... 30 bits.
176 We check only with 8-bit values shifted left 0 to 24 bits. *)
178 let rec is_immed n shift =
180 (Nativeint.logand n (Nativeint.shift_left (Nativeint.of_int 0xFF) shift) = n
181 || is_immed n (shift + 2))
183 let is_immediate n = is_immed n 0
185 (* General functional to decompose a non-immediate integer constant
186 into 8-bit chunks shifted left 0 ... 24 bits *)
188 let decompose_intconst n fn =
191 let ninstr = ref 0 in
193 if Nativeint.to_int (Nativeint.shift_right !i !shift) land 3 = 0 then
196 let mask = Nativeint.shift_left 0xFFn !shift in
197 let bits = Nativeint.logand !i mask in
200 i := Nativeint.sub !i bits;
206 (* Emit a non-immediate integer constant *)
208 let emit_complex_intconst r n =
209 let first = ref true in
213 then ` mov {emit_reg r}, #{emit_nativeint bits} @ {emit_nativeint n}\n`
214 else ` add {emit_reg r}, {emit_reg r}, #{emit_nativeint bits}\n`;
217 (* Adjust sp (up or down) by the given byte amount *)
219 let emit_stack_adjustment instr n =
220 if n <= 0 then 0 else
221 decompose_intconst (Nativeint.of_int n)
223 ` {emit_string instr} sp, sp, #{emit_nativeint bits}\n`)
225 (* Adjust alloc_ptr down by the given byte amount *)
227 let emit_alloc_decrement n =
228 decompose_intconst (Nativeint.of_int n)
230 ` sub alloc_ptr, alloc_ptr, #{emit_nativeint bits}\n`)
232 (* Name of current function *)
233 let function_name = ref ""
234 (* Entry point for tail recursive calls *)
235 let tailrec_entry_point = ref 0
236 (* Table of symbols referenced *)
237 let symbol_constants = (Hashtbl.create 11 : (string, int) Hashtbl.t)
238 (* Table of floating-point literals *)
239 let float_constants = (Hashtbl.create 11 : (string, int) Hashtbl.t)
240 (* Total space (in word) occupied by pending literals *)
241 let num_literals = ref 0
242 (* True if we've at least one pending float literal *)
243 let pending_float = ref false
245 (* Label a symbol or float constant *)
246 let label_constant tbl s size =
250 let lbl = new_label() in
251 Hashtbl.add tbl s lbl;
252 num_literals := !num_literals + size;
255 (* Emit all pending constants *)
257 let emit_constants () =
260 `{emit_label lbl}: .word {emit_symbol s}\n`)
264 `{emit_label lbl}: .double {emit_string s}\n`)
266 Hashtbl.clear symbol_constants;
267 Hashtbl.clear float_constants;
269 pending_float := false
271 (* Output the assembly code for an instruction *)
276 | Lop(Imove | Ispill | Ireload) ->
277 let src = i.arg.(0) and dst = i.res.(0) in
278 if src.loc = dst.loc then 0 else begin
279 match (src, dst) with
280 {loc = Reg rs; typ = Int|Addr}, {loc = Reg rd; typ = Int|Addr} ->
281 ` mov {emit_reg dst}, {emit_reg src}\n`; 1
282 | {loc = Reg rs; typ = Float}, {loc = Reg rd; typ = Float} ->
283 ` mvfd {emit_reg dst}, {emit_reg src}\n`; 1
284 | {loc = Reg rs; typ = Float}, {loc = Reg rd; typ = Int|Addr} ->
285 ` stfd {emit_reg src}, [sp, #-8]!\n`;
286 ` ldmfd sp!, \{{emit_reg dst}, {emit_next_reg dst}}\n`; 2
287 | {loc = Reg rs; typ = Int|Addr}, {loc = Stack sd} ->
288 ` str {emit_reg src}, {emit_stack dst}\n`; 1
289 | {loc = Reg rs; typ = Float}, {loc = Stack sd} ->
290 ` stfd {emit_reg src}, {emit_stack dst}\n`; 1
291 | {loc = Stack ss; typ = Int|Addr}, {loc = Reg rd} ->
292 ` ldr {emit_reg dst}, {emit_stack src}\n`; 1
293 | {loc = Stack ss; typ = Float}, {loc = Reg rd} ->
294 ` ldfd {emit_reg dst}, {emit_stack src}\n`; 1
298 | Lop(Iconst_int n) ->
300 let nr = Nativeint.lognot n in
301 if is_immediate n then begin
302 ` mov {emit_reg r}, #{emit_nativeint n}\n`; 1
303 end else if is_immediate nr then begin
304 ` mvn {emit_reg r}, #{emit_nativeint nr}\n`; 1
306 emit_complex_intconst r n
307 | Lop(Iconst_float s) ->
308 begin match Int64.bits_of_float (float_of_string s) with
309 | 0x0000_0000_0000_0000L -> (* +0.0 *)
310 ` mvfd {emit_reg i.res.(0)}, #0.0\n`
312 let lbl = label_constant float_constants s 2 in
313 pending_float := true;
314 ` ldfd {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string s}\n`
317 | Lop(Iconst_symbol s) ->
318 let lbl = label_constant symbol_constants s 1 in
319 ` ldr {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_symbol s}\n`; 1
322 `{record_frame i.live} mov pc, {emit_reg i.arg.(0)}\n`; 2
323 | Lop(Icall_imm s) ->
324 `{record_frame i.live} bl {emit_symbol s}\n`; 1
325 | Lop(Itailcall_ind) ->
326 let n = frame_size() in
327 if !contains_calls then
328 ` ldr lr, [sp, #{emit_int (n-4)}]\n`;
329 ignore (emit_stack_adjustment "add" n);
330 ` mov pc, {emit_reg i.arg.(0)}\n`; 3
331 | Lop(Itailcall_imm s) ->
332 if s = !function_name then begin
333 ` b {emit_label !tailrec_entry_point}\n`; 1
335 let n = frame_size() in
336 if !contains_calls then
337 ` ldr lr, [sp, #{emit_int (n-4)}]\n`;
338 ignore (emit_stack_adjustment "add" n);
339 ` b {emit_symbol s}\n`; 3
341 | Lop(Iextcall(s, alloc)) ->
343 let lbl = label_constant symbol_constants s 1 in
344 ` ldr r10, {emit_label lbl} @ {emit_symbol s}\n`;
345 `{record_frame i.live} bl caml_c_call\n`; 2
347 ` bl {emit_symbol s}\n`; 1
349 | Lop(Istackoffset n) ->
352 then emit_stack_adjustment "sub" n
353 else emit_stack_adjustment "add" (-n) in
354 stack_offset := !stack_offset + n;
356 | Lop(Iload(Single, addr)) ->
358 ` ldfs {emit_reg r}, {emit_addressing addr i.arg 0}\n`;
359 ` mvfd {emit_reg r}, {emit_reg r}\n`;
361 | Lop(Iload(size, addr)) ->
365 Byte_unsigned -> "ldrb"
366 | Byte_signed -> "ldrsb"
367 | Sixteen_unsigned -> "ldrh"
368 | Sixteen_signed -> "ldrsh"
369 | Double | Double_u -> "ldfd"
370 | _ (* Word | Thirtytwo_signed | Thirtytwo_unsigned *) -> "ldr" in
371 ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 0}\n`;
373 | Lop(Istore(Single, addr)) ->
375 ` mvfs f7, {emit_reg r}\n`;
376 ` stfs f7, {emit_addressing addr i.arg 1}\n`;
378 | Lop(Istore(size, addr)) ->
382 Byte_unsigned | Byte_signed -> "strb"
383 | Sixteen_unsigned | Sixteen_signed -> "strh"
384 | Double | Double_u -> "stfd"
385 | _ (* Word | Thirtytwo_signed | Thirtytwo_unsigned *) -> "str" in
386 ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 1}\n`;
389 if !fastcode_flag then begin
390 ` ldr r10, [alloc_limit, #0]\n`;
391 let ni = emit_alloc_decrement n in
392 ` cmp alloc_ptr, r10\n`;
393 `{record_frame i.live} blcc caml_call_gc\n`;
394 ` add {emit_reg i.res.(0)}, alloc_ptr, #4\n`;
396 end else if n = 8 || n = 12 || n = 16 then begin
397 `{record_frame i.live} bl caml_alloc{emit_int ((n-4)/4)}\n`;
398 ` add {emit_reg i.res.(0)}, alloc_ptr, #4\n`; 2
400 let nn = Nativeint.of_int n in
402 if is_immediate nn then begin
403 ` mov r10, #{emit_int n}\n`; 1
405 emit_complex_intconst (phys_reg 8 (*r10*)) nn in
406 `{record_frame i.live} bl caml_allocN\n`;
407 ` add {emit_reg i.res.(0)}, alloc_ptr, #4\n`;
410 | Lop(Iintop(Ilsl | Ilsr | Iasr as op)) ->
411 let shift = name_for_shift_operation op in
412 ` mov {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_string shift} {emit_reg i.arg.(1)}\n`; 1
413 | Lop(Iintop(Icomp cmp)) ->
414 let comp = name_for_comparison cmp in
415 ` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
416 ` mov {emit_reg i.res.(0)}, #0\n`;
417 ` mov{emit_string comp} {emit_reg i.res.(0)}, #1\n`; 3
418 | Lop(Iintop(Icheckbound)) ->
419 ` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
420 ` blls caml_ml_array_bound_error\n`; 2
422 let instr = name_for_int_operation op in
423 ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; 1
424 | Lop(Iintop_imm(Idiv, n)) -> (* n is a power of 2 *)
425 let l = Misc.log2 n in
427 ` movs {emit_reg r}, {emit_reg i.arg.(0)}\n`;
429 ` addlt {emit_reg r}, {emit_reg r}, #{emit_int (n-1)}\n`
431 ` addlt {emit_reg r}, {emit_reg r}, #{emit_int n}\n`;
432 ` sublt {emit_reg r}, {emit_reg r}, #1\n`
434 ` mov {emit_reg r}, {emit_reg r}, asr #{emit_int l}\n`; 4
435 | Lop(Iintop_imm(Imod, n)) -> (* n is a power of 2 *)
436 let l = Misc.log2 n in
439 let lbl = new_label() in
440 ` cmp {emit_reg a}, #0\n`;
441 ` mov {emit_reg r}, {emit_reg a}, lsl #{emit_int (32-l)}\n`;
442 ` mov {emit_reg r}, {emit_reg r}, lsr #{emit_int (32-l)}\n`;
443 ` bpl {emit_label lbl}\n`;
444 ` cmp {emit_reg r}, #0\n`;
445 ` subne {emit_reg r}, {emit_reg r}, #{emit_int n}\n`;
446 `{emit_label lbl}:\n`; 6
447 | Lop(Iintop_imm((Ilsl | Ilsr | Iasr as op), n)) ->
448 let shift = name_for_shift_operation op in
449 ` mov {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_string shift} #{emit_int n}\n`; 1
450 | Lop(Iintop_imm(Icomp cmp, n)) ->
451 let comp = name_for_comparison cmp in
452 ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
453 ` mov {emit_reg i.res.(0)}, #0\n`;
454 ` mov{emit_string comp} {emit_reg i.res.(0)}, #1\n`; 3
455 | Lop(Iintop_imm(Icheckbound, n)) ->
456 ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
457 ` blls caml_ml_array_bound_error\n`; 2
458 | Lop(Iintop_imm(op, n)) ->
459 let instr = name_for_int_operation op in
460 ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`; 1
461 | Lop(Inegf | Iabsf | Ifloatofint | Iintoffloat as op) ->
462 let instr = name_for_float_operation op in
463 ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`; 1
464 | Lop(Iaddf | Isubf | Imulf | Idivf as op) ->
465 let instr = name_for_float_operation op in
466 ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; 1
467 | Lop(Ispecific(Ishiftarith(op, shift))) ->
468 let instr = name_for_shift_int_operation op in
469 ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}`;
471 then `, lsl #{emit_int shift}\n`
472 else `, asr #{emit_int (-shift)}\n`;
474 | Lop(Ispecific(Ishiftcheckbound shift)) ->
475 ` cmp {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, lsr #{emit_int shift}\n`;
476 ` blcs caml_ml_array_bound_error\n`; 2
477 | Lop(Ispecific(Irevsubimm n)) ->
478 ` rsb {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`; 1
480 let n = frame_size() in
481 ` ldr lr, [sp, #{emit_int(n-4)}]\n`; 1
483 let n = frame_size() in
484 ignore(emit_stack_adjustment "add" n);
487 `{emit_label lbl}:\n`; 0
489 ` b {emit_label lbl}\n`; 1
490 | Lcondbranch(tst, lbl) ->
493 ` cmp {emit_reg i.arg.(0)}, #0\n`;
494 ` bne {emit_label lbl}\n`
496 ` cmp {emit_reg i.arg.(0)}, #0\n`;
497 ` beq {emit_label lbl}\n`
499 ` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
500 let comp = name_for_comparison cmp in
501 ` b{emit_string comp} {emit_label lbl}\n`
502 | Iinttest_imm(cmp, n) ->
503 ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
504 let comp = name_for_comparison cmp in
505 ` b{emit_string comp} {emit_label lbl}\n`
506 | Ifloattest(cmp, neg) ->
509 ` cmf {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
511 ` cmfe {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
513 let comp = name_for_float_comparison cmp neg in
514 ` b{emit_string comp} {emit_label lbl}\n`
516 ` tst {emit_reg i.arg.(0)}, #1\n`;
517 ` bne {emit_label lbl}\n`
519 ` tst {emit_reg i.arg.(0)}, #1\n`;
520 ` beq {emit_label lbl}\n`
523 | Lcondbranch3(lbl0, lbl1, lbl2) ->
524 ` cmp {emit_reg i.arg.(0)}, #1\n`;
525 begin match lbl0 with
527 | Some lbl -> ` blt {emit_label lbl}\n`
529 begin match lbl1 with
531 | Some lbl -> ` beq {emit_label lbl}\n`
533 begin match lbl2 with
535 | Some lbl -> ` bgt {emit_label lbl}\n`
539 ` ldr pc, [pc, {emit_reg i.arg.(0)}, lsl #2]\n`;
540 ` mov r0, r0\n`; (* nop *)
541 for i = 0 to Array.length jumptbl - 1 do
542 ` .word {emit_label jumptbl.(i)}\n`
544 2 + Array.length jumptbl
546 ` bl {emit_label lbl}\n`; 1
548 stack_offset := !stack_offset + 8;
549 ` stmfd sp!, \{trap_ptr, lr}\n`;
550 ` mov trap_ptr, sp\n`; 2
552 ` ldmfd sp!, \{trap_ptr, lr}\n`;
553 stack_offset := !stack_offset - 8; 1
555 ` mov sp, trap_ptr\n`;
556 ` ldmfd sp!, \{trap_ptr, pc}\n`; 2
558 (* Emission of an instruction sequence *)
560 let no_fallthrough = function
561 Lop(Itailcall_ind | Itailcall_imm _) -> true
568 let rec emit_all ninstr i =
569 if i.desc = Lend then () else begin
570 let n = emit_instr i in
571 let ninstr' = ninstr + n in
572 let limit = (if !pending_float then 127 else 511) - !num_literals in
573 if ninstr' >= limit - 32 && no_fallthrough i.desc then begin
577 if ninstr' >= limit then begin
578 let lbl = new_label() in
579 ` b {emit_label lbl}\n`;
581 `{emit_label lbl}:\n`;
584 emit_all ninstr' i.next
587 (* Emission of a function declaration *)
589 let fundecl fundecl =
590 function_name := fundecl.fun_name;
591 fastcode_flag := fundecl.fun_fast;
592 tailrec_entry_point := new_label();
594 Hashtbl.clear symbol_constants;
595 Hashtbl.clear float_constants;
598 ` .global {emit_symbol fundecl.fun_name}\n`;
599 `{emit_symbol fundecl.fun_name}:\n`;
600 let n = frame_size() in
601 ignore(emit_stack_adjustment "sub" n);
602 if !contains_calls then
603 ` str lr, [sp, #{emit_int(n - 4)}]\n`;
604 `{emit_label !tailrec_entry_point}:\n`;
605 emit_all 0 fundecl.fun_body;
608 (* Emission of data *)
610 let emit_item = function
612 ` .global {emit_symbol s}\n`;
613 | Cdefine_symbol s ->
615 | Cdefine_label lbl ->
616 `{emit_label (10000 + lbl)}:\n`
618 ` .byte {emit_int n}\n`
620 ` .short {emit_int n}\n`
622 ` .word {emit_nativeint n}\n`
624 ` .word {emit_nativeint n}\n`
626 ` .float {emit_string f}\n`
628 (* FIXME: this version of the ARM port is mixed-endian, so we
629 use .double instead of emit_float64_directive. The next
630 version is little-endian, so we'll use emit_float64 then. *)
632 ` .double {emit_string f}\n`
633 | Csymbol_address s ->
634 ` .word {emit_symbol s}\n`
635 | Clabel_address lbl ->
636 ` .word {emit_label (10000 + lbl)}\n`
638 emit_string_directive " .ascii " s
640 if n > 0 then ` .space {emit_int n}\n`
642 ` .align {emit_int(Misc.log2 n)}\n`
646 List.iter emit_item l
648 (* Beginning / end of an assembly file *)
650 let begin_assembly() =
651 `trap_ptr .req r11\n`;
652 `alloc_ptr .req r8\n`;
653 `alloc_limit .req r9\n`;
654 let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
656 ` .global {emit_symbol lbl_begin}\n`;
657 `{emit_symbol lbl_begin}:\n`;
658 let lbl_begin = Compilenv.make_symbol (Some "code_begin") in
660 ` .global {emit_symbol lbl_begin}\n`;
661 `{emit_symbol lbl_begin}:\n`
663 let end_assembly () =
664 let lbl_end = Compilenv.make_symbol (Some "code_end") in
666 ` .global {emit_symbol lbl_end}\n`;
667 `{emit_symbol lbl_end}:\n`;
668 let lbl_end = Compilenv.make_symbol (Some "data_end") in
670 ` .global {emit_symbol lbl_end}\n`;
671 `{emit_symbol lbl_end}:\n`;
673 let lbl = Compilenv.make_symbol (Some "frametable") in
675 ` .global {emit_symbol lbl}\n`;
676 `{emit_symbol lbl}:\n`;
677 ` .word {emit_int (List.length !frame_descriptors)}\n`;
678 List.iter emit_frame !frame_descriptors;
679 frame_descriptors := []