l4/pkg/ocaml/contrib/asmcomp/amd64/emit.mlp

   1 (***********************************************************************)
   2 (*                                                                     *)
   3 (*                           Objective Caml                            *)
   4 (*                                                                     *)
   5 (*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
   6 (*                                                                     *)
   7 (*  Copyright 1996 Institut National de Recherche en Informatique et   *)
   8 (*  en Automatique.  All rights reserved.  This file is distributed    *)
   9 (*  under the terms of the Q Public License version 1.0.               *)
  10 (*                                                                     *)
  11 (***********************************************************************)
  12
  13 (* $Id: emit.mlp 9475 2009-12-16 10:04:38Z xleroy $ *)
  14
  15 (* Emission of x86-64 (AMD 64) assembly code *)
  16
  17 open Misc
  18 open Cmm
  19 open Arch
  20 open Proc
  21 open Reg
  22 open Mach
  23 open Linearize
  24 open Emitaux
  25
  26 let macosx =
  27   match Config.system with
  28   | "macosx" -> true
  29   | _ -> false
  30
  31
  32 (* Tradeoff between code size and code speed *)
  33
  34 let fastcode_flag = ref true
  35
  36 let stack_offset = ref 0
  37
  38 (* Layout of the stack frame *)
  39
  40 let frame_required () =
  41   !contains_calls || num_stack_slots.(0) > 0 || num_stack_slots.(1) > 0
  42
  43 let frame_size () =                     (* includes return address *)
  44   if frame_required() then begin
  45     let sz =
  46       (!stack_offset + 8 * (num_stack_slots.(0) + num_stack_slots.(1)) + 8)
  47     in Misc.align sz 16
  48   end else
  49     !stack_offset + 8
  50
  51 let slot_offset loc cl =
  52   match loc with
  53     Incoming n -> frame_size() + n
  54   | Local n ->
  55       if cl = 0
  56       then !stack_offset + n * 8
  57       else !stack_offset + (num_stack_slots.(0) + n) * 8
  58   | Outgoing n -> n
  59
  60 (* Symbols *)
  61
  62 let emit_symbol s =
  63     if macosx then emit_string "_";
  64     Emitaux.emit_symbol '$' s
  65
  66 let emit_call s =
  67   if !Clflags.dlcode && not macosx
  68   then `call    {emit_symbol s}@PLT`
  69   else `call    {emit_symbol s}`
  70
  71 let emit_jump s =
  72   if !Clflags.dlcode && not macosx
  73   then `jmp     {emit_symbol s}@PLT`
  74   else `jmp     {emit_symbol s}`
  75
  76 let load_symbol_addr s =
  77   if !Clflags.dlcode
  78   then `movq    {emit_symbol s}@GOTPCREL(%rip)`
  79   else if !pic_code
  80   then `leaq    {emit_symbol s}(%rip)`
  81   else `movq    ${emit_symbol s}`
  82
  83
  84 (* Output a label *)
  85
  86 let emit_label lbl =
  87   emit_string ".L"; emit_int lbl
  88
  89 (* Output a .align directive. *)
  90
  91 let emit_align n =
  92     let n = if macosx then Misc.log2 n else n in
  93   `     .align  {emit_int n}\n`
  94
  95 let emit_Llabel fallthrough lbl =
  96   if not fallthrough && !fastcode_flag then emit_align 4;
  97   emit_label lbl
  98
  99 (* Output a pseudo-register *)
 100
 101 let emit_reg = function
 102     { loc = Reg r } ->
 103       emit_string (register_name r)
 104   | { loc = Stack s } as r ->
 105       let ofs = slot_offset s (register_class r) in
 106       `{emit_int ofs}(%rsp)`
 107   | { loc = Unknown } ->
 108       assert false
 109
 110 (* Output a reference to the lower 8, 16 or 32 bits of a register *)
 111
 112 let reg_low_8_name =
 113   [| "%al"; "%bl"; "%dil"; "%sil"; "%dl"; "%cl"; "%r8b"; "%r9b";
 114      "%r10b"; "%r11b"; "%bpl"; "%r12b"; "%r13b" |]
 115 let reg_low_16_name =
 116   [| "%ax"; "%bx"; "%di"; "%si"; "%dx"; "%cx"; "%r8w"; "%r9w";
 117      "%r10w"; "%r11w"; "%bp"; "%r12w"; "%r13w" |]
 118 let reg_low_32_name =
 119   [| "%eax"; "%ebx"; "%edi"; "%esi"; "%edx"; "%ecx"; "%r8d"; "%r9d";
 120      "%r10d"; "%r11d"; "%ebp"; "%r12d"; "%r13d" |]
 121
 122 let emit_subreg tbl r =
 123   match r.loc with
 124     Reg r when r < 13 ->
 125       emit_string tbl.(r)
 126   | Stack s ->
 127       let ofs = slot_offset s (register_class r) in
 128       `{emit_int ofs}(%rsp)`
 129   | _ ->
 130       assert false
 131
 132 let emit_reg8 r = emit_subreg reg_low_8_name r
 133 let emit_reg16 r = emit_subreg reg_low_16_name r
 134 let emit_reg32 r = emit_subreg reg_low_32_name r
 135
 136 (* Output an addressing mode *)
 137
 138 let emit_addressing addr r n =
 139   match addr with
 140   | Ibased _ when !Clflags.dlcode -> assert false
 141   | Ibased(s, d) ->
 142       `{emit_symbol s}`;
 143       if d <> 0 then ` + {emit_int d}`;
 144       `(%rip)`
 145   | Iindexed d ->
 146       if d <> 0 then emit_int d;
 147       `({emit_reg r.(n)})`
 148   | Iindexed2 d ->
 149       if d <> 0 then emit_int d;
 150       `({emit_reg r.(n)}, {emit_reg r.(n+1)})`
 151   | Iscaled(2, d) ->
 152       if d <> 0 then emit_int d;
 153       `({emit_reg r.(n)}, {emit_reg r.(n)})`
 154   | Iscaled(scale, d) ->
 155       if d <> 0 then emit_int d;
 156       `(, {emit_reg r.(n)}, {emit_int scale})`
 157   | Iindexed2scaled(scale, d) ->
 158       if d <> 0 then emit_int d;
 159       `({emit_reg r.(n)}, {emit_reg r.(n+1)}, {emit_int scale})`
 160
 161 (* Record live pointers at call points -- see Emitaux *)
 162
 163 let record_frame_label live dbg =
 164   let lbl = new_label() in
 165   let live_offset = ref [] in
 166   Reg.Set.iter
 167     (function
 168         {typ = Addr; loc = Reg r} ->
 169           live_offset := ((r lsl 1) + 1) :: !live_offset
 170       | {typ = Addr; loc = Stack s} as reg ->
 171           live_offset := slot_offset s (register_class reg) :: !live_offset
 172       | _ -> ())
 173     live;
 174   frame_descriptors :=
 175     { fd_lbl = lbl;
 176       fd_frame_size = frame_size();
 177       fd_live_offset = !live_offset;
 178       fd_debuginfo = dbg } :: !frame_descriptors;
 179   lbl
 180
 181 let record_frame live dbg =
 182   let lbl = record_frame_label live dbg in `{emit_label lbl}:\n`
 183
 184 (* Record calls to the GC -- we've moved them out of the way *)
 185
 186 type gc_call =
 187   { gc_lbl: label;                      (* Entry label *)
 188     gc_return_lbl: label;               (* Where to branch after GC *)
 189     gc_frame: label }                   (* Label of frame descriptor *)
 190
 191 let call_gc_sites = ref ([] : gc_call list)
 192
 193 let emit_call_gc gc =
 194   `{emit_label gc.gc_lbl}:      {emit_call "caml_call_gc"}\n`;
 195   `{emit_label gc.gc_frame}:    jmp     {emit_label gc.gc_return_lbl}\n`
 196
 197 (* Record calls to caml_ml_array_bound_error.
 198    In -g mode, we maintain one call to caml_ml_array_bound_error
 199    per bound check site.  Without -g, we can share a single call. *)
 200
 201 type bound_error_call =
 202   { bd_lbl: label;                      (* Entry label *)
 203     bd_frame: label }                   (* Label of frame descriptor *)
 204
 205 let bound_error_sites = ref ([] : bound_error_call list)
 206 let bound_error_call = ref 0
 207
 208 let bound_error_label dbg =
 209   if !Clflags.debug then begin
 210     let lbl_bound_error = new_label() in
 211     let lbl_frame = record_frame_label Reg.Set.empty dbg in
 212     bound_error_sites :=
 213      { bd_lbl = lbl_bound_error; bd_frame = lbl_frame } :: !bound_error_sites;
 214    lbl_bound_error
 215  end else begin
 216    if !bound_error_call = 0 then bound_error_call := new_label();
 217    !bound_error_call
 218  end
 219
 220 let emit_call_bound_error bd =
 221   `{emit_label bd.bd_lbl}:      {emit_call "caml_ml_array_bound_error"}\n`;
 222   `{emit_label bd.bd_frame}:\n`
 223
 224 let emit_call_bound_errors () =
 225   List.iter emit_call_bound_error !bound_error_sites;
 226   if !bound_error_call > 0 then
 227     `{emit_label !bound_error_call}:    {emit_call "caml_ml_array_bound_error"}\n`
 228
 229 (* Names for instructions *)
 230
 231 let instr_for_intop = function
 232     Iadd -> "addq"
 233   | Isub -> "subq"
 234   | Imul -> "imulq"
 235   | Iand -> "andq"
 236   | Ior -> "orq"
 237   | Ixor -> "xorq"
 238   | Ilsl -> "salq"
 239   | Ilsr -> "shrq"
 240   | Iasr -> "sarq"
 241   | _ -> assert false
 242
 243 let instr_for_floatop = function
 244     Iaddf -> "addsd"
 245   | Isubf -> "subsd"
 246   | Imulf -> "mulsd"
 247   | Idivf -> "divsd"
 248   | _ -> assert false
 249
 250 let instr_for_floatarithmem = function
 251     Ifloatadd -> "addsd"
 252   | Ifloatsub -> "subsd"
 253   | Ifloatmul -> "mulsd"
 254   | Ifloatdiv -> "divsd"
 255
 256 let name_for_cond_branch = function
 257     Isigned Ceq -> "e"     | Isigned Cne -> "ne"
 258   | Isigned Cle -> "le"     | Isigned Cgt -> "g"
 259   | Isigned Clt -> "l"     | Isigned Cge -> "ge"
 260   | Iunsigned Ceq -> "e"   | Iunsigned Cne -> "ne"
 261   | Iunsigned Cle -> "be"  | Iunsigned Cgt -> "a"
 262   | Iunsigned Clt -> "b"  | Iunsigned Cge -> "ae"
 263
 264 (* Output an = 0 or <> 0 test. *)
 265
 266 let output_test_zero arg =
 267   match arg.loc with
 268     Reg r -> `  testq   {emit_reg arg}, {emit_reg arg}\n`
 269   | _     -> `  cmpq    $0, {emit_reg arg}\n`
 270
 271 (* Output a floating-point compare and branch *)
 272
 273 let emit_float_test cmp neg arg lbl =
 274   begin match cmp with
 275   | Ceq | Cne -> `      ucomisd `
 276   | _         -> `      comisd  `
 277   end;
 278   `{emit_reg arg.(1)}, {emit_reg arg.(0)}\n`;
 279   let (branch_opcode, need_jp) =
 280     match (cmp, neg) with
 281       (Ceq, false) -> ("je", true)
 282     | (Ceq, true)  -> ("jne", true)
 283     | (Cne, false) -> ("jne", true)
 284     | (Cne, true)  -> ("je", true)
 285     | (Clt, false) -> ("jb", true)
 286     | (Clt, true)  -> ("jae", true)
 287     | (Cle, false) -> ("jbe", true)
 288     | (Cle, true)  -> ("ja", true)
 289     | (Cgt, false) -> ("ja", false)
 290     | (Cgt, true)  -> ("jbe", false)
 291     | (Cge, false) -> ("jae", true)
 292     | (Cge, true)  -> ("jb", false) in
 293   let branch_if_not_comparable =
 294     if cmp = Cne then not neg else neg in
 295   if need_jp then
 296     if branch_if_not_comparable then begin
 297       ` jp      {emit_label lbl}\n`;
 298       ` {emit_string branch_opcode}     {emit_label lbl}\n`
 299     end else begin
 300       let next = new_label() in
 301       ` jp      {emit_label next}\n`;
 302       ` {emit_string branch_opcode}     {emit_label lbl}\n`;
 303       `{emit_label next}:\n`
 304     end
 305   else begin
 306     `   {emit_string branch_opcode}     {emit_label lbl}\n`
 307   end
 308
 309 (* Deallocate the stack frame before a return or tail call *)
 310
 311 let output_epilogue () =
 312   if frame_required() then begin
 313     let n = frame_size() - 8 in
 314     `   addq    ${emit_int n}, %rsp\n`
 315   end
 316
 317 (* Output the assembly code for an instruction *)
 318
 319 (* Name of current function *)
 320 let function_name = ref ""
 321 (* Entry point for tail recursive calls *)
 322 let tailrec_entry_point = ref 0
 323
 324 let float_constants = ref ([] : (int * string) list)
 325
 326 let emit_instr fallthrough i =
 327     match i.desc with
 328       Lend -> ()
 329     | Lop(Imove | Ispill | Ireload) ->
 330         let src = i.arg.(0) and dst = i.res.(0) in
 331         if src.loc <> dst.loc then begin
 332           if src.typ = Float then
 333             `   movsd   {emit_reg src}, {emit_reg dst}\n`
 334           else
 335               ` movq    {emit_reg src}, {emit_reg dst}\n`
 336         end
 337     | Lop(Iconst_int n) ->
 338         if n = 0n then begin
 339           match i.res.(0).loc with
 340             Reg n -> `  xorq    {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
 341           | _     -> `  movq    $0, {emit_reg i.res.(0)}\n`
 342         end else if n <= 0x7FFFFFFFn && n >= -0x80000000n then
 343           `     movq    ${emit_nativeint n}, {emit_reg i.res.(0)}\n`
 344         else
 345           `     movabsq ${emit_nativeint n}, {emit_reg i.res.(0)}\n`
 346     | Lop(Iconst_float s) ->
 347         begin match Int64.bits_of_float (float_of_string s) with
 348         | 0x0000_0000_0000_0000L ->       (* +0.0 *)
 349           `     xorpd   {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
 350         | _ ->
 351           let lbl = new_label() in
 352           float_constants := (lbl, s) :: !float_constants;
 353           `     movlpd  {emit_label lbl}(%rip), {emit_reg i.res.(0)}\n`
 354         end
 355     | Lop(Iconst_symbol s) ->
 356         `       {load_symbol_addr s}, {emit_reg i.res.(0)}\n`
 357     | Lop(Icall_ind) ->
 358         `       call    *{emit_reg i.arg.(0)}\n`;
 359         record_frame i.live i.dbg
 360     | Lop(Icall_imm(s)) ->
 361         `       {emit_call s}\n`;
 362         record_frame i.live i.dbg
 363     | Lop(Itailcall_ind) ->
 364         output_epilogue();
 365         `       jmp     *{emit_reg i.arg.(0)}\n`
 366     | Lop(Itailcall_imm s) ->
 367         if s = !function_name then
 368           `     jmp     {emit_label !tailrec_entry_point}\n`
 369         else begin
 370           output_epilogue();
 371           `     {emit_jump s}\n`
 372         end
 373     | Lop(Iextcall(s, alloc)) ->
 374         if alloc then begin
 375           `     {load_symbol_addr s}, %rax\n`;
 376           `     {emit_call "caml_c_call"}\n`;
 377           record_frame i.live i.dbg
 378         end else begin
 379           `     {emit_call s}\n`
 380         end
 381     | Lop(Istackoffset n) ->
 382         if n < 0
 383         then `  addq    ${emit_int(-n)}, %rsp\n`
 384         else `  subq    ${emit_int(n)}, %rsp\n`;
 385         stack_offset := !stack_offset + n
 386     | Lop(Iload(chunk, addr)) ->
 387         let dest = i.res.(0) in
 388         begin match chunk with
 389           | Word ->
 390               ` movq    {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
 391           | Byte_unsigned ->
 392               ` movzbq  {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
 393           | Byte_signed ->
 394               ` movsbq  {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
 395           | Sixteen_unsigned ->
 396               ` movzwq  {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
 397           | Sixteen_signed ->
 398               ` movswq  {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
 399           | Thirtytwo_unsigned ->
 400               ` movl    {emit_addressing addr i.arg 0}, {emit_reg32 dest}\n`
 401           | Thirtytwo_signed ->
 402               ` movslq  {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
 403           | Single ->
 404             `   cvtss2sd {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
 405           | Double | Double_u ->
 406             `   movlpd  {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
 407         end
 408     | Lop(Istore(chunk, addr)) ->
 409         begin match chunk with
 410           | Word ->
 411             `   movq    {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
 412           | Byte_unsigned | Byte_signed ->
 413             `   movb    {emit_reg8 i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
 414           | Sixteen_unsigned | Sixteen_signed ->
 415             `   movw    {emit_reg16 i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
 416           | Thirtytwo_signed | Thirtytwo_unsigned ->
 417             `   movl    {emit_reg32 i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
 418           | Single ->
 419             `   cvtsd2ss {emit_reg i.arg.(0)}, %xmm15\n`;
 420             `   movss   %xmm15, {emit_addressing addr i.arg 1}\n`
 421           | Double | Double_u ->
 422             `   movlpd  {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
 423         end
 424     | Lop(Ialloc n) ->
 425         if !fastcode_flag then begin
 426           let lbl_redo = new_label() in
 427           `{emit_label lbl_redo}:       subq    ${emit_int n}, %r15\n`;
 428           if !Clflags.dlcode then begin
 429             `   {load_symbol_addr "caml_young_limit"}, %rax\n`;
 430             `   cmpq    (%rax), %r15\n`;
 431           end else
 432             `   cmpq    {emit_symbol "caml_young_limit"}(%rip), %r15\n`;
 433           let lbl_call_gc = new_label() in
 434           let lbl_frame = record_frame_label i.live Debuginfo.none in
 435           `     jb      {emit_label lbl_call_gc}\n`;
 436           `     leaq    8(%r15), {emit_reg i.res.(0)}\n`;
 437           call_gc_sites :=
 438             { gc_lbl = lbl_call_gc;
 439               gc_return_lbl = lbl_redo;
 440               gc_frame = lbl_frame } :: !call_gc_sites
 441         end else begin
 442           begin match n with
 443             16  -> `    {emit_call "caml_alloc1"}\n`
 444           | 24 -> `     {emit_call "caml_alloc2"}\n`
 445           | 32 -> `     {emit_call "caml_alloc3"}\n`
 446           | _  -> `     movq    ${emit_int n}, %rax\n`;
 447                   `     {emit_call "caml_allocN"}\n`
 448           end;
 449           `{record_frame i.live Debuginfo.none} leaq    8(%r15), {emit_reg i.res.(0)}\n`
 450         end
 451     | Lop(Iintop(Icomp cmp)) ->
 452         `       cmpq    {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`;
 453         let b = name_for_cond_branch cmp in
 454         `       set{emit_string b}      %al\n`;
 455         `       movzbq  %al, {emit_reg i.res.(0)}\n`
 456     | Lop(Iintop_imm(Icomp cmp, n)) ->
 457         `       cmpq    ${emit_int n}, {emit_reg i.arg.(0)}\n`;
 458         let b = name_for_cond_branch cmp in
 459         `       set{emit_string b}      %al\n`;
 460         `       movzbq  %al, {emit_reg i.res.(0)}\n`
 461     | Lop(Iintop Icheckbound) ->
 462         let lbl = bound_error_label i.dbg in
 463         `       cmpq    {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`;
 464         `       jbe     {emit_label lbl}\n`
 465     | Lop(Iintop_imm(Icheckbound, n)) ->
 466         let lbl = bound_error_label i.dbg in
 467         `       cmpq    ${emit_int n}, {emit_reg i.arg.(0)}\n`;
 468         `       jbe     {emit_label lbl}\n`
 469     | Lop(Iintop(Idiv | Imod)) ->
 470         `       cqto\n`;
 471         `       idivq   {emit_reg i.arg.(1)}\n`
 472     | Lop(Iintop(Ilsl | Ilsr | Iasr as op)) ->
 473         (* We have i.arg.(0) = i.res.(0) and i.arg.(1) = %rcx *)
 474         `       {emit_string(instr_for_intop op)}       %cl, {emit_reg i.res.(0)}\n`
 475     | Lop(Iintop op) ->
 476         (* We have i.arg.(0) = i.res.(0) *)
 477         `       {emit_string(instr_for_intop op)}       {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`
 478     | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
 479         `       leaq    {emit_int n}({emit_reg i.arg.(0)}), {emit_reg i.res.(0)}\n`
 480     | Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) ->
 481         `       incq    {emit_reg i.res.(0)}\n`
 482     | Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) ->
 483         `       decq    {emit_reg i.res.(0)}\n`
 484     | Lop(Iintop_imm(Idiv, n)) ->
 485         (* Note: i.arg.(0) = i.res.(0) = rdx  (cf. selection.ml) *)
 486         let l = Misc.log2 n in
 487         `       movq    {emit_reg i.arg.(0)}, %rax\n`;
 488         `       addq    ${emit_int(n-1)}, {emit_reg i.arg.(0)}\n`;
 489         `       testq   %rax, %rax\n`;
 490         `       cmovns  %rax, {emit_reg i.arg.(0)}\n`;
 491         `       sarq    ${emit_int l}, {emit_reg i.res.(0)}\n`
 492     | Lop(Iintop_imm(Imod, n)) ->
 493         (* Note: i.arg.(0) = i.res.(0) = rdx  (cf. selection.ml) *)
 494         `       movq    {emit_reg i.arg.(0)}, %rax\n`;
 495         `       testq   %rax, %rax\n`;
 496         `       leaq    {emit_int(n-1)}(%rax), %rax\n`;
 497         `       cmovns  {emit_reg i.arg.(0)}, %rax\n`;
 498         `       andq    ${emit_int (-n)}, %rax\n`;
 499         `       subq    %rax, {emit_reg i.res.(0)}\n`
 500     | Lop(Iintop_imm(op, n)) ->
 501         (* We have i.arg.(0) = i.res.(0) *)
 502         `       {emit_string(instr_for_intop op)}       ${emit_int n}, {emit_reg i.res.(0)}\n`
 503     | Lop(Inegf) ->
 504         `       xorpd   {emit_symbol "caml_negf_mask"}(%rip), {emit_reg i.res.(0)}\n`
 505     | Lop(Iabsf) ->
 506         `       andpd   {emit_symbol "caml_absf_mask"}(%rip), {emit_reg i.res.(0)}\n`
 507     | Lop(Iaddf | Isubf | Imulf | Idivf as floatop) ->
 508         `       {emit_string(instr_for_floatop floatop)}        {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`
 509     | Lop(Ifloatofint) ->
 510         `       cvtsi2sdq       {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n`
 511     | Lop(Iintoffloat) ->
 512         `       cvttsd2siq      {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n`
 513     | Lop(Ispecific(Ilea addr)) ->
 514         `       leaq    {emit_addressing addr i.arg 0}, {emit_reg i.res.(0)}\n`
 515     | Lop(Ispecific(Istore_int(n, addr))) ->
 516         `       movq    ${emit_nativeint n}, {emit_addressing addr i.arg 0}\n`
 517     | Lop(Ispecific(Istore_symbol(s, addr))) ->
 518         assert (not !pic_code && not !Clflags.dlcode);
 519         `       movq    ${emit_symbol s}, {emit_addressing addr i.arg 0}\n`
 520     | Lop(Ispecific(Ioffset_loc(n, addr))) ->
 521         `       addq    ${emit_int n}, {emit_addressing addr i.arg 0}\n`
 522     | Lop(Ispecific(Ifloatarithmem(op, addr))) ->
 523         `       {emit_string(instr_for_floatarithmem op)}       {emit_addressing addr i.arg 1}, {emit_reg i.res.(0)}\n`
 524     | Lreloadretaddr ->
 525         ()
 526     | Lreturn ->
 527         output_epilogue();
 528         `       ret\n`
 529     | Llabel lbl ->
 530         `{emit_Llabel fallthrough lbl}:\n`
 531     | Lbranch lbl ->
 532         `       jmp     {emit_label lbl}\n`
 533     | Lcondbranch(tst, lbl) ->
 534         begin match tst with
 535           Itruetest ->
 536             output_test_zero i.arg.(0);
 537             `   jne     {emit_label lbl}\n`
 538         | Ifalsetest ->
 539             output_test_zero i.arg.(0);
 540             `   je      {emit_label lbl}\n`
 541         | Iinttest cmp ->
 542             `   cmpq    {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`;
 543             let b = name_for_cond_branch cmp in
 544             `   j{emit_string b}        {emit_label lbl}\n`
 545         | Iinttest_imm((Isigned Ceq | Isigned Cne |
 546                         Iunsigned Ceq | Iunsigned Cne) as cmp, 0) ->
 547             output_test_zero i.arg.(0);
 548             let b = name_for_cond_branch cmp in
 549             `   j{emit_string b}        {emit_label lbl}\n`
 550         | Iinttest_imm(cmp, n) ->
 551             `   cmpq    ${emit_int n}, {emit_reg i.arg.(0)}\n`;
 552             let b = name_for_cond_branch cmp in
 553             `   j{emit_string b}        {emit_label lbl}\n`
 554         | Ifloattest(cmp, neg) ->
 555             emit_float_test cmp neg i.arg lbl
 556         | Ioddtest ->
 557             `   testb   $1, {emit_reg8 i.arg.(0)}\n`;
 558             `   jne     {emit_label lbl}\n`
 559         | Ieventest ->
 560             `   testb   $1, {emit_reg8 i.arg.(0)}\n`;
 561             `   je      {emit_label lbl}\n`
 562         end
 563     | Lcondbranch3(lbl0, lbl1, lbl2) ->
 564             `   cmpq    $1, {emit_reg i.arg.(0)}\n`;
 565             begin match lbl0 with
 566               None -> ()
 567             | Some lbl -> `     jb      {emit_label lbl}\n`
 568             end;
 569             begin match lbl1 with
 570               None -> ()
 571             | Some lbl -> `     je      {emit_label lbl}\n`
 572             end;
 573             begin match lbl2 with
 574               None -> ()
 575             | Some lbl -> `     jg      {emit_label lbl}\n`
 576             end
 577     | Lswitch jumptbl ->
 578         let lbl = new_label() in
 579         (* rax and rdx are clobbered by the Lswitch,
 580            meaning that no variable that is live across the Lswitch
 581            is assigned to rax or rdx.  However, the argument to Lswitch
 582            can still be assigned to one of these two registers, so
 583            we must be careful not to clobber it before use. *)
 584         let (tmp1, tmp2) =
 585           if i.arg.(0).loc = Reg 0 (* rax *)
 586           then (phys_reg 4 (*rdx*), phys_reg 0 (*rax*))
 587           else (phys_reg 0 (*rax*), phys_reg 4 (*rdx*)) in
 588         `       leaq    {emit_label lbl}(%rip), {emit_reg tmp1}\n`;
 589         `       movslq  ({emit_reg tmp1}, {emit_reg i.arg.(0)}, 4), {emit_reg tmp2}\n`;
 590         `       addq    {emit_reg tmp2}, {emit_reg tmp1}\n`;
 591         `       jmp     *{emit_reg tmp1}\n`;
 592         if macosx
 593         then `  .const\n`
 594         else `  .section .rodata\n`;
 595         emit_align 4;
 596         `{emit_label lbl}:`;
 597         for i = 0 to Array.length jumptbl - 1 do
 598           `     .long   {emit_label jumptbl.(i)} - {emit_label lbl}\n`
 599         done;
 600         `       .text\n`
 601     | Lsetuptrap lbl ->
 602         `       call    {emit_label lbl}\n`
 603     | Lpushtrap ->
 604         `       pushq   %r14\n`;
 605         `       movq    %rsp, %r14\n`;
 606         stack_offset := !stack_offset + 16
 607     | Lpoptrap ->
 608         `       popq    %r14\n`;
 609         `       addq    $8, %rsp\n`;
 610         stack_offset := !stack_offset - 16
 611     | Lraise ->
 612         if !Clflags.debug then begin
 613           `     {emit_call "caml_raise_exn"}\n`;
 614           record_frame Reg.Set.empty i.dbg
 615         end else begin
 616           `     movq    %r14, %rsp\n`;
 617           `     popq    %r14\n`;
 618           `     ret\n`
 619         end
 620
 621 let rec emit_all fallthrough i =
 622   match i.desc with
 623   |  Lend -> ()
 624   | _ ->
 625       emit_instr fallthrough i;
 626       emit_all (Linearize.has_fallthrough i.desc) i.next
 627
 628 (* Emission of the floating-point constants *)
 629
 630 let emit_float_constant (lbl, cst) =
 631   `{emit_label lbl}:`;
 632   emit_float64_directive ".quad" cst
 633
 634 (* Emission of the profiling prelude *)
 635
 636 let emit_profile () =
 637   match Config.system with
 638   | "linux" | "gnu" ->
 639       (* mcount preserves rax, rcx, rdx, rsi, rdi, r8, r9 explicitly
 640          and rbx, rbp, r12-r15 like all C functions.
 641          We need to preserve r10 and r11 ourselves, since Caml can
 642          use them for argument passing. *)
 643       ` pushq   %r10\n`;
 644       ` movq    %rsp, %rbp\n`;
 645       ` pushq   %r11\n`;
 646       ` {emit_call "mcount"}\n`;
 647       ` popq    %r11\n`;
 648       ` popq    %r10\n`
 649   | _ ->
 650       () (*unsupported yet*)
 651
 652 (* Emission of a function declaration *)
 653
 654 let fundecl fundecl =
 655   function_name := fundecl.fun_name;
 656   fastcode_flag := fundecl.fun_fast;
 657   tailrec_entry_point := new_label();
 658   stack_offset := 0;
 659   float_constants := [];
 660   call_gc_sites := [];
 661   bound_error_sites := [];
 662   bound_error_call := 0;
 663   `     .text\n`;
 664   emit_align 16;
 665   if macosx
 666   && not !Clflags.output_c_object
 667   && is_generic_function fundecl.fun_name
 668   then (* PR#4690 *)
 669     `   .private_extern {emit_symbol fundecl.fun_name}\n`
 670   else
 671     `   .globl  {emit_symbol fundecl.fun_name}\n`;
 672   `{emit_symbol fundecl.fun_name}:\n`;
 673   if !Clflags.gprofile then emit_profile();
 674   if frame_required() then begin
 675     let n = frame_size() - 8 in
 676     `   subq    ${emit_int n}, %rsp\n`
 677   end;
 678   `{emit_label !tailrec_entry_point}:\n`;
 679   emit_all true fundecl.fun_body;
 680   List.iter emit_call_gc !call_gc_sites;
 681   emit_call_bound_errors ();
 682   if !float_constants <> [] then begin
 683     if macosx
 684     then `      .literal8\n`
 685     else `      .section        .rodata.cst8,\"a\",@progbits\n`;
 686     List.iter emit_float_constant !float_constants
 687   end;
 688   match Config.system with
 689     "linux" | "gnu" ->
 690       ` .type   {emit_symbol fundecl.fun_name},@function\n`;
 691       ` .size   {emit_symbol fundecl.fun_name},.-{emit_symbol fundecl.fun_name}\n`
 692   | _ -> ()
 693
 694 (* Emission of data *)
 695
 696 let emit_item = function
 697     Cglobal_symbol s ->
 698       ` .globl  {emit_symbol s}\n`;
 699   | Cdefine_symbol s ->
 700       `{emit_symbol s}:\n`
 701   | Cdefine_label lbl ->
 702       `{emit_label (100000 + lbl)}:\n`
 703   | Cint8 n ->
 704       ` .byte   {emit_int n}\n`
 705   | Cint16 n ->
 706       ` .word   {emit_int n}\n`
 707   | Cint32 n ->
 708       ` .long   {emit_nativeint n}\n`
 709   | Cint n ->
 710       ` .quad   {emit_nativeint n}\n`
 711   | Csingle f ->
 712       emit_float32_directive ".long" f
 713   | Cdouble f ->
 714       emit_float64_directive ".quad" f
 715   | Csymbol_address s ->
 716       ` .quad   {emit_symbol s}\n`
 717   | Clabel_address lbl ->
 718       ` .quad   {emit_label (100000 + lbl)}\n`
 719   | Cstring s ->
 720       emit_string_directive "   .ascii  " s
 721   | Cskip n ->
 722       if n > 0 then `   .space  {emit_int n}\n`
 723   | Calign n ->
 724       emit_align n
 725
 726 let data l =
 727   `     .data\n`;
 728   List.iter emit_item l
 729
 730 (* Beginning / end of an assembly file *)
 731
 732 let begin_assembly() =
 733   if !Clflags.dlcode then begin
 734     (* from amd64.S; could emit these constants on demand *)
 735     if macosx then begin
 736         `       .literal16\n`;
 737         `       .align  4\n`;
 738         `caml_negf_mask:        .quad   0x8000000000000000, 0\n`;
 739         `       .align  4\n`;
 740         `caml_absf_mask:        .quad   0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF\n`;
 741     end else begin
 742         `       .section        .rodata.cst8,\"a\",@progbits\n`;
 743         `       .align  16\n`;
 744         `caml_negf_mask:        .quad   0x8000000000000000, 0\n`;
 745         `       .align  16\n`;
 746         `caml_absf_mask:        .quad   0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF\n`;
 747     end;
 748   end;
 749   let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
 750   `     .data\n`;
 751   `     .globl  {emit_symbol lbl_begin}\n`;
 752   `{emit_symbol lbl_begin}:\n`;
 753   let lbl_begin = Compilenv.make_symbol (Some "code_begin") in
 754   `     .text\n`;
 755   `     .globl  {emit_symbol lbl_begin}\n`;
 756   `{emit_symbol lbl_begin}:\n`;
 757   if macosx then `      nop\n` (* PR#4690 *)
 758
 759 let end_assembly() =
 760   let lbl_end = Compilenv.make_symbol (Some "code_end") in
 761   `     .text\n`;
 762   if macosx then `      nop\n`; (* suppress "ld warning: atom sorting error" *)
 763   `     .globl  {emit_symbol lbl_end}\n`;
 764   `{emit_symbol lbl_end}:\n`;
 765   `     .data\n`;
 766   let lbl_end = Compilenv.make_symbol (Some "data_end") in
 767   `     .globl  {emit_symbol lbl_end}\n`;
 768   `{emit_symbol lbl_end}:\n`;
 769   `     .long   0\n`;
 770   let lbl = Compilenv.make_symbol (Some "frametable") in
 771   `     .globl  {emit_symbol lbl}\n`;
 772   `{emit_symbol lbl}:\n`;
 773   emit_frames
 774     { efa_label = (fun l -> `   .quad   {emit_label l}\n`);
 775       efa_16 = (fun n -> `      .word   {emit_int n}\n`);
 776       efa_32 = (fun n -> `      .long   {emit_int32 n}\n`);
 777       efa_word = (fun n -> `    .quad   {emit_int n}\n`);
 778       efa_align = emit_align;
 779       efa_label_rel =
 780         if macosx then begin
 781           let setcnt = ref 0 in
 782           fun lbl ofs ->
 783             incr setcnt;
 784             `   .set    L$set${emit_int !setcnt}, ({emit_label lbl} - .) + {emit_int32 ofs}\n`;
 785             `   .long L$set${emit_int !setcnt}\n`
 786         end else begin
 787           fun lbl ofs ->
 788              `  .long   ({emit_label lbl} - .) + {emit_int32 ofs}\n`
 789         end;
 790       efa_def_label = (fun l -> `{emit_label l}:\n`);
 791       efa_string = (fun s -> emit_string_directive "    .asciz  " s) };
 792   if Config.system = "linux" then
 793     (* Mark stack as non-executable, PR#4564 *)
 794     `   .section .note.GNU-stack,\"\",%progbits\n`
 795