l4/pkg/ocaml/contrib/asmcomp/i386/emit.mlp

   1 (***********************************************************************)
   2 (*                                                                     *)
   3 (*                           Objective Caml                            *)
   4 (*                                                                     *)
   5 (*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
   6 (*                                                                     *)
   7 (*  Copyright 1996 Institut National de Recherche en Informatique et   *)
   8 (*  en Automatique.  All rights reserved.  This file is distributed    *)
   9 (*  under the terms of the Q Public License version 1.0.               *)
  10 (*                                                                     *)
  11 (***********************************************************************)
  12
  13 (* $Id: emit.mlp 9475 2009-12-16 10:04:38Z xleroy $ *)
  14
  15 (* Emission of Intel 386 assembly code *)
  16
  17 module StringSet = Set.Make(struct type t = string let compare = compare end)
  18
  19 open Location
  20 open Misc
  21 open Cmm
  22 open Arch
  23 open Proc
  24 open Reg
  25 open Mach
  26 open Linearize
  27 open Emitaux
  28
  29 (* Tradeoff between code size and code speed *)
  30
  31 let fastcode_flag = ref true
  32
  33 let stack_offset = ref 0
  34
  35 (* Layout of the stack frame *)
  36
  37 let frame_size () =                     (* includes return address *)
  38   let sz =
  39     !stack_offset + 4 * num_stack_slots.(0) + 8 * num_stack_slots.(1) + 4
  40   in Misc.align sz stack_alignment
  41
  42 let slot_offset loc cl =
  43   match loc with
  44     Incoming n ->
  45       assert (n >= 0);
  46       frame_size() + n
  47   | Local n ->
  48       if cl = 0
  49       then !stack_offset + n * 4
  50       else !stack_offset + num_stack_slots.(0) * 4 + n * 8
  51   | Outgoing n ->
  52       assert (n >= 0);
  53       n
  54
  55 let trap_frame_size = Misc.align 8 stack_alignment
  56
  57 (* Prefixing of symbols with "_" *)
  58
  59 let symbol_prefix =
  60   match Config.system with
  61     "linux_elf" -> ""
  62   | "bsd_elf" -> ""
  63   | "solaris" -> ""
  64   | "beos" -> ""
  65   | "gnu" -> ""
  66   | _ -> "_"
  67
  68 let emit_symbol s =
  69   emit_string symbol_prefix; Emitaux.emit_symbol '$' s
  70
  71 (* Output a label *)
  72
  73 let label_prefix =
  74   match Config.system with
  75     "linux_elf" -> ".L"
  76   | "bsd_elf" -> ".L"
  77   | "solaris" -> ".L"
  78   | "beos" -> ".L"
  79   | "gnu" -> ".L"
  80   | _ -> "L"
  81
  82 let emit_label lbl =
  83   emit_string label_prefix; emit_int lbl
  84
  85
  86 (* Some data directives have different names under Solaris *)
  87
  88 let word_dir =
  89   match Config.system with
  90     "solaris" -> ".value"
  91   | _ -> ".word"
  92 let skip_dir =
  93   match Config.system with
  94     "solaris" -> ".zero"
  95   | _ -> ".space"
  96 let use_ascii_dir =
  97   match Config.system with
  98     "solaris" -> false
  99   | _ -> true
 100
 101 (* MacOSX has its own way to reference symbols potentially defined in
 102    shared objects *)
 103
 104 let macosx =
 105   match Config.system with
 106   | "macosx" -> true
 107   | _ -> false
 108
 109 (* Output a .align directive.
 110    The numerical argument to .align is log2 of alignment size, except
 111    under ELF, where it is the alignment size... *)
 112
 113 let emit_align =
 114   match Config.system with
 115     "linux_elf" | "bsd_elf" | "solaris" | "beos" | "cygwin" | "mingw" | "gnu" ->
 116       (fun n -> `       .align  {emit_int n}\n`)
 117   | _ ->
 118       (fun n -> `       .align  {emit_int(Misc.log2 n)}\n`)
 119
 120 let emit_Llabel fallthrough lbl =
 121   if not fallthrough && !fastcode_flag then
 122     emit_align 16 ;
 123   emit_label lbl
 124
 125 (* Output a pseudo-register *)
 126
 127 let emit_reg = function
 128     { loc = Reg r } ->
 129       emit_string (register_name r)
 130   | { loc = Stack(Incoming n | Outgoing n) } when n < 0 ->
 131       `{emit_symbol "caml_extra_params"} + {emit_int (n + 64)}`
 132   | { loc = Stack s } as r ->
 133       let ofs = slot_offset s (register_class r) in
 134       `{emit_int ofs}(%esp)`
 135   | { loc = Unknown } ->
 136       fatal_error "Emit_i386.emit_reg"
 137
 138 (* Output a reference to the lower 8 bits or lower 16 bits of a register *)
 139
 140 let reg_low_byte_name = [| "%al"; "%bl"; "%cl"; "%dl" |]
 141 let reg_low_half_name = [| "%ax"; "%bx"; "%cx"; "%dx"; "%si"; "%di"; "%bp" |]
 142
 143 let emit_reg8 r =
 144   match r.loc with
 145     Reg r when r < 4 -> emit_string (reg_low_byte_name.(r))
 146   | _ -> fatal_error "Emit_i386.emit_reg8"
 147
 148 let emit_reg16 r =
 149   match r.loc with
 150     Reg r when r < 7 -> emit_string (reg_low_half_name.(r))
 151   | _ -> fatal_error "Emit_i386.emit_reg16"
 152
 153 (* Output an addressing mode *)
 154
 155 let emit_addressing addr r n =
 156   match addr with
 157     Ibased(s, d) ->
 158       `{emit_symbol s}`;
 159       if d <> 0 then ` + {emit_int d}`
 160   | Iindexed d ->
 161       if d <> 0 then emit_int d;
 162       `({emit_reg r.(n)})`
 163   | Iindexed2 d ->
 164       if d <> 0 then emit_int d;
 165       `({emit_reg r.(n)}, {emit_reg r.(n+1)})`
 166   | Iscaled(2, d) ->
 167       if d <> 0 then emit_int d;
 168       `({emit_reg r.(n)}, {emit_reg r.(n)})`
 169   | Iscaled(scale, d) ->
 170       if d <> 0 then emit_int d;
 171       `(, {emit_reg r.(n)}, {emit_int scale})`
 172   | Iindexed2scaled(scale, d) ->
 173       if d <> 0 then emit_int d;
 174       `({emit_reg r.(n)}, {emit_reg r.(n+1)}, {emit_int scale})`
 175
 176 (* Record live pointers at call points *)
 177
 178 let record_frame_label live dbg =
 179   let lbl = new_label() in
 180   let live_offset = ref [] in
 181   Reg.Set.iter
 182     (function
 183         {typ = Addr; loc = Reg r} ->
 184           live_offset := ((r lsl 1) + 1) :: !live_offset
 185       | {typ = Addr; loc = Stack s} as reg ->
 186           live_offset := slot_offset s (register_class reg) :: !live_offset
 187       | _ -> ())
 188     live;
 189   frame_descriptors :=
 190     { fd_lbl = lbl;
 191       fd_frame_size = frame_size();
 192       fd_live_offset = !live_offset;
 193       fd_debuginfo = dbg } :: !frame_descriptors;
 194   lbl
 195
 196 let record_frame live dbg =
 197   let lbl = record_frame_label live dbg in `{emit_label lbl}:\n`
 198
 199 (* Record calls to the GC -- we've moved them out of the way *)
 200
 201 type gc_call =
 202   { gc_lbl: label;                      (* Entry label *)
 203     gc_return_lbl: label;               (* Where to branch after GC *)
 204     gc_frame: label }                   (* Label of frame descriptor *)
 205
 206 let call_gc_sites = ref ([] : gc_call list)
 207
 208 let emit_call_gc gc =
 209   `{emit_label gc.gc_lbl}:      call    {emit_symbol "caml_call_gc"}\n`;
 210   `{emit_label gc.gc_frame}:    jmp     {emit_label gc.gc_return_lbl}\n`
 211
 212 (* Record calls to caml_ml_array_bound_error.
 213    In -g mode, we maintain one call to caml_ml_array_bound_error
 214    per bound check site.  Without -g, we can share a single call. *)
 215
 216 type bound_error_call =
 217   { bd_lbl: label;                      (* Entry label *)
 218     bd_frame: label }                   (* Label of frame descriptor *)
 219
 220 let bound_error_sites = ref ([] : bound_error_call list)
 221 let bound_error_call = ref 0
 222
 223 let bound_error_label dbg =
 224   if !Clflags.debug then begin
 225     let lbl_bound_error = new_label() in
 226     let lbl_frame = record_frame_label Reg.Set.empty dbg in
 227     bound_error_sites :=
 228      { bd_lbl = lbl_bound_error; bd_frame = lbl_frame } :: !bound_error_sites;
 229    lbl_bound_error
 230  end else begin
 231    if !bound_error_call = 0 then bound_error_call := new_label();
 232    !bound_error_call
 233  end
 234
 235 let emit_call_bound_error bd =
 236   `{emit_label bd.bd_lbl}:      call    {emit_symbol "caml_ml_array_bound_error"}\n`;
 237   `{emit_label bd.bd_frame}:\n`
 238
 239 let emit_call_bound_errors () =
 240   List.iter emit_call_bound_error !bound_error_sites;
 241   if !bound_error_call > 0 then
 242     `{emit_label !bound_error_call}:    call    {emit_symbol "caml_ml_array_bound_error"}\n`
 243
 244 (* Names for instructions *)
 245
 246 let instr_for_intop = function
 247     Iadd -> "addl"
 248   | Isub -> "subl"
 249   | Imul -> "imull"
 250   | Iand -> "andl"
 251   | Ior -> "orl"
 252   | Ixor -> "xorl"
 253   | Ilsl -> "sall"
 254   | Ilsr -> "shrl"
 255   | Iasr -> "sarl"
 256   | _ -> fatal_error "Emit_i386: instr_for_intop"
 257
 258 let instr_for_floatop = function
 259     Inegf -> "fchs"
 260   | Iabsf -> "fabs"
 261   | Iaddf -> "faddl"
 262   | Isubf -> "fsubl"
 263   | Imulf -> "fmull"
 264   | Idivf -> "fdivl"
 265   | Ispecific Isubfrev -> "fsubrl"
 266   | Ispecific Idivfrev -> "fdivrl"
 267   | _ -> fatal_error "Emit_i386: instr_for_floatop"
 268
 269 let instr_for_floatop_reversed = function
 270     Iaddf -> "faddl"
 271   | Isubf -> "fsubrl"
 272   | Imulf -> "fmull"
 273   | Idivf -> "fdivrl"
 274   | Ispecific Isubfrev -> "fsubl"
 275   | Ispecific Idivfrev -> "fdivl"
 276   | _ -> fatal_error "Emit_i386: instr_for_floatop_reversed"
 277
 278 let instr_for_floatop_pop = function
 279     Iaddf -> "faddp"
 280   | Isubf -> "fsubp"
 281   | Imulf -> "fmulp"
 282   | Idivf -> "fdivp"
 283   | Ispecific Isubfrev -> "fsubrp"
 284   | Ispecific Idivfrev -> "fdivrp"
 285   | _ -> fatal_error "Emit_i386: instr_for_floatop_pop"
 286
 287 let instr_for_floatarithmem double = function
 288     Ifloatadd -> if double then "faddl" else "fadds"
 289   | Ifloatsub -> if double then "fsubl" else "fsubs"
 290   | Ifloatsubrev -> if double then "fsubrl" else "fsubrs"
 291   | Ifloatmul -> if double then "fmull" else "fmuls"
 292   | Ifloatdiv -> if double then "fdivl" else "fdivs"
 293   | Ifloatdivrev -> if double then "fdivrl" else "fdivrs"
 294
 295 let name_for_cond_branch = function
 296     Isigned Ceq -> "e"     | Isigned Cne -> "ne"
 297   | Isigned Cle -> "le"     | Isigned Cgt -> "g"
 298   | Isigned Clt -> "l"     | Isigned Cge -> "ge"
 299   | Iunsigned Ceq -> "e"   | Iunsigned Cne -> "ne"
 300   | Iunsigned Cle -> "be"  | Iunsigned Cgt -> "a"
 301   | Iunsigned Clt -> "b"  | Iunsigned Cge -> "ae"
 302
 303 (* Output an = 0 or <> 0 test. *)
 304
 305 let output_test_zero arg =
 306   match arg.loc with
 307     Reg r -> `  testl   {emit_reg arg}, {emit_reg arg}\n`
 308   | _     -> `  cmpl    $0, {emit_reg arg}\n`
 309
 310 (* Deallocate the stack frame before a return or tail call *)
 311
 312 let output_epilogue () =
 313   let n = frame_size() - 4 in
 314   if n > 0 then `       addl    ${emit_int n}, %esp\n`
 315
 316 (* Determine if the given register is the top of the floating-point stack *)
 317
 318 let is_tos = function { loc = Reg _; typ = Float } -> true | _ -> false
 319
 320 (* Emit the code for a floating-point comparison *)
 321
 322 let emit_float_test cmp neg arg lbl =
 323   let actual_cmp =
 324     match (is_tos arg.(0), is_tos arg.(1)) with
 325       (true, true) ->
 326       (* both args on top of FP stack *)
 327       ` fcompp\n`;
 328       cmp
 329     | (true, false) ->
 330       (* first arg on top of FP stack *)
 331       ` fcompl  {emit_reg arg.(1)}\n`;
 332       cmp
 333     | (false, true) ->
 334       (* second arg on top of FP stack *)
 335       ` fcompl  {emit_reg arg.(0)}\n`;
 336       Cmm.swap_comparison cmp
 337     | (false, false) ->
 338       ` fldl    {emit_reg arg.(0)}\n`;
 339       ` fcompl  {emit_reg arg.(1)}\n`;
 340       cmp
 341     in
 342   `     fnstsw  %ax\n`;
 343   begin match actual_cmp with
 344     Ceq ->
 345       if neg then begin
 346       ` andb    $68, %ah\n`;
 347       ` xorb    $64, %ah\n`;
 348       ` jne     `
 349       end else begin
 350       ` andb    $69, %ah\n`;
 351       ` cmpb    $64, %ah\n`;
 352       ` je      `
 353       end
 354   | Cne ->
 355       if neg then begin
 356       ` andb    $69, %ah\n`;
 357       ` cmpb    $64, %ah\n`;
 358       ` je      `
 359       end else begin
 360       ` andb    $68, %ah\n`;
 361       ` xorb    $64, %ah\n`;
 362       ` jne     `
 363       end
 364   | Cle ->
 365       ` andb    $69, %ah\n`;
 366       ` decb    %ah\n`;
 367       ` cmpb    $64, %ah\n`;
 368       if neg
 369       then `    jae     `
 370       else `    jb      `
 371   | Cge ->
 372       ` andb    $5, %ah\n`;
 373       if neg
 374       then `    jne     `
 375       else `    je      `
 376   | Clt ->
 377       ` andb    $69, %ah\n`;
 378       ` cmpb    $1, %ah\n`;
 379       if neg
 380       then `    jne     `
 381       else `    je      `
 382   | Cgt ->
 383       ` andb    $69, %ah\n`;
 384       if neg
 385       then `    jne     `
 386       else `    je      `
 387   end;
 388   `{emit_label lbl}\n`
 389
 390 (* Emit a Ifloatspecial instruction *)
 391
 392 let emit_floatspecial = function
 393     "atan"  -> `        fld1; fpatan\n`
 394   | "atan2" -> `        fpatan\n`
 395   | "cos"   -> `        fcos\n`
 396   | "log"   -> `        fldln2; fxch; fyl2x\n`
 397   | "log10" -> `        fldlg2; fxch; fyl2x\n`
 398   | "sin"   -> `        fsin\n`
 399   | "sqrt"  -> `        fsqrt\n`
 400   | "tan"   -> `        fptan; fstp %st(0)\n`
 401   | _ -> assert false
 402
 403 (* Output the assembly code for an instruction *)
 404
 405 (* Name of current function *)
 406 let function_name = ref ""
 407 (* Entry point for tail recursive calls *)
 408 let tailrec_entry_point = ref 0
 409 (* Label of trap for out-of-range accesses *)
 410 let range_check_trap = ref 0
 411 (* Record float literals to be emitted later *)
 412 let float_constants = ref ([] : (int * string) list)
 413 (* Record references to external C functions (for MacOSX) *)
 414 let external_symbols_direct = ref StringSet.empty
 415 let external_symbols_indirect = ref StringSet.empty
 416
 417 let emit_instr fallthrough i =
 418     match i.desc with
 419       Lend -> ()
 420     | Lop(Imove | Ispill | Ireload) ->
 421         let src = i.arg.(0) and dst = i.res.(0) in
 422         if src.loc <> dst.loc then begin
 423           if src.typ = Float then
 424             if is_tos src then
 425               ` fstpl   {emit_reg dst}\n`
 426             else if is_tos dst then
 427               ` fldl    {emit_reg src}\n`
 428             else begin
 429               ` fldl    {emit_reg src}\n`;
 430               ` fstpl   {emit_reg dst}\n`
 431             end
 432           else
 433               ` movl    {emit_reg src}, {emit_reg dst}\n`
 434         end
 435     | Lop(Iconst_int n) ->
 436         if n = 0n then begin
 437           match i.res.(0).loc with
 438             Reg n -> `  xorl    {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
 439           | _     -> `  movl    $0, {emit_reg i.res.(0)}\n`
 440         end else
 441           `     movl    ${emit_nativeint n}, {emit_reg i.res.(0)}\n`
 442     | Lop(Iconst_float s) ->
 443         begin match Int64.bits_of_float (float_of_string s) with
 444         | 0x0000_0000_0000_0000L ->       (* +0.0 *)
 445           `     fldz\n`
 446         | 0x8000_0000_0000_0000L ->       (* -0.0 *)
 447           `     fldz\n  fchs\n`
 448         | 0x3FF0_0000_0000_0000L ->       (*  1.0 *)
 449           `     fld1\n`
 450         | 0xBFF0_0000_0000_0000L ->       (* -1.0 *)
 451           `     fld1\n  fchs\n`
 452         | _ ->
 453           let lbl = new_label() in
 454           float_constants := (lbl, s) :: !float_constants;
 455           `     fldl    {emit_label lbl}\n`
 456         end
 457     | Lop(Iconst_symbol s) ->
 458         `       movl    ${emit_symbol s}, {emit_reg i.res.(0)}\n`
 459     | Lop(Icall_ind) ->
 460         `       call    *{emit_reg i.arg.(0)}\n`;
 461         record_frame i.live i.dbg
 462     | Lop(Icall_imm s) ->
 463         `       call    {emit_symbol s}\n`;
 464         record_frame i.live i.dbg
 465     | Lop(Itailcall_ind) ->
 466         output_epilogue();
 467         `       jmp     *{emit_reg i.arg.(0)}\n`
 468     | Lop(Itailcall_imm s) ->
 469         if s = !function_name then
 470           `     jmp     {emit_label !tailrec_entry_point}\n`
 471         else begin
 472           output_epilogue();
 473           `     jmp     {emit_symbol s}\n`
 474         end
 475     | Lop(Iextcall(s, alloc)) ->
 476         if alloc then begin
 477           if not macosx then
 478             `   movl    ${emit_symbol s}, %eax\n`
 479           else begin
 480             external_symbols_indirect :=
 481               StringSet.add s !external_symbols_indirect;
 482             `   movl    L{emit_symbol s}$non_lazy_ptr, %eax\n`
 483           end;
 484           `     call    {emit_symbol "caml_c_call"}\n`;
 485           record_frame i.live i.dbg
 486         end else begin
 487           if not macosx then
 488             `   call    {emit_symbol s}\n`
 489           else begin
 490             external_symbols_direct :=
 491               StringSet.add s !external_symbols_direct;
 492             `   call    L{emit_symbol s}$stub\n`
 493           end
 494         end
 495     | Lop(Istackoffset n) ->
 496         if n < 0
 497         then `  addl    ${emit_int(-n)}, %esp\n`
 498         else `  subl    ${emit_int(n)}, %esp\n`;
 499         stack_offset := !stack_offset + n
 500     | Lop(Iload(chunk, addr)) ->
 501         let dest = i.res.(0) in
 502         begin match chunk with
 503           | Word | Thirtytwo_signed | Thirtytwo_unsigned ->
 504               ` movl    {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
 505           | Byte_unsigned ->
 506               ` movzbl  {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
 507           | Byte_signed ->
 508               ` movsbl  {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
 509           | Sixteen_unsigned ->
 510               ` movzwl  {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
 511           | Sixteen_signed ->
 512               ` movswl  {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
 513           | Single ->
 514             `   flds    {emit_addressing addr i.arg 0}\n`
 515           | Double | Double_u ->
 516             `   fldl    {emit_addressing addr i.arg 0}\n`
 517         end
 518     | Lop(Istore(chunk, addr)) ->
 519         begin match chunk with
 520           | Word | Thirtytwo_signed | Thirtytwo_unsigned ->
 521             `   movl    {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
 522           | Byte_unsigned | Byte_signed ->
 523             `   movb    {emit_reg8 i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
 524           | Sixteen_unsigned | Sixteen_signed ->
 525             `   movw    {emit_reg16 i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
 526           | Single ->
 527               if is_tos i.arg.(0) then
 528                 `       fstps   {emit_addressing addr i.arg 1}\n`
 529               else begin
 530                 `       fldl    {emit_reg i.arg.(0)}\n`;
 531                 `       fstps   {emit_addressing addr i.arg 1}\n`
 532               end
 533           | Double | Double_u ->
 534               if is_tos i.arg.(0) then
 535                 `       fstpl   {emit_addressing addr i.arg 1}\n`
 536               else begin
 537                 `       fldl    {emit_reg i.arg.(0)}\n`;
 538                 `       fstpl   {emit_addressing addr i.arg 1}\n`
 539               end
 540         end
 541     | Lop(Ialloc n) ->
 542         if !fastcode_flag then begin
 543           let lbl_redo = new_label() in
 544           `{emit_label lbl_redo}:       movl    {emit_symbol "caml_young_ptr"}, %eax\n`;
 545           `     subl    ${emit_int n}, %eax\n`;
 546           `     movl    %eax, {emit_symbol "caml_young_ptr"}\n`;
 547           `     cmpl    {emit_symbol "caml_young_limit"}, %eax\n`;
 548           let lbl_call_gc = new_label() in
 549           let lbl_frame = record_frame_label i.live Debuginfo.none in
 550           `     jb      {emit_label lbl_call_gc}\n`;
 551           `     leal    4(%eax), {emit_reg i.res.(0)}\n`;
 552           call_gc_sites :=
 553             { gc_lbl = lbl_call_gc;
 554               gc_return_lbl = lbl_redo;
 555               gc_frame = lbl_frame } :: !call_gc_sites
 556         end else begin
 557           begin match n with
 558             8  -> `     call    {emit_symbol "caml_alloc1"}\n`
 559           | 12 -> `     call    {emit_symbol "caml_alloc2"}\n`
 560           | 16 -> `     call    {emit_symbol "caml_alloc3"}\n`
 561           | _  -> `     movl    ${emit_int n}, %eax\n`;
 562                   `     call    {emit_symbol "caml_allocN"}\n`
 563           end;
 564           `{record_frame i.live Debuginfo.none} leal    4(%eax), {emit_reg i.res.(0)}\n`
 565         end
 566     | Lop(Iintop(Icomp cmp)) ->
 567         `       cmpl    {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`;
 568         let b = name_for_cond_branch cmp in
 569         `       set{emit_string b}      %al\n`;
 570         `       movzbl  %al, {emit_reg i.res.(0)}\n`
 571     | Lop(Iintop_imm(Icomp cmp, n)) ->
 572         `       cmpl    ${emit_int n}, {emit_reg i.arg.(0)}\n`;
 573         let b = name_for_cond_branch cmp in
 574         `       set{emit_string b}      %al\n`;
 575         `       movzbl  %al, {emit_reg i.res.(0)}\n`
 576     | Lop(Iintop Icheckbound) ->
 577         let lbl = bound_error_label i.dbg in
 578         `       cmpl    {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`;
 579         `       jbe     {emit_label lbl}\n`
 580     | Lop(Iintop_imm(Icheckbound, n)) ->
 581         let lbl = bound_error_label i.dbg in
 582         `       cmpl    ${emit_int n}, {emit_reg i.arg.(0)}\n`;
 583         `       jbe     {emit_label lbl}\n`
 584     | Lop(Iintop(Idiv | Imod)) ->
 585         `       cltd\n`;
 586         `       idivl   {emit_reg i.arg.(1)}\n`
 587     | Lop(Iintop(Ilsl | Ilsr | Iasr as op)) ->
 588         (* We have i.arg.(0) = i.res.(0) and i.arg.(1) = %ecx *)
 589         `       {emit_string(instr_for_intop op)}       %cl, {emit_reg i.res.(0)}\n`
 590     | Lop(Iintop op) ->
 591         (* We have i.arg.(0) = i.res.(0) *)
 592         `       {emit_string(instr_for_intop op)}       {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`
 593     | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
 594         `       leal    {emit_int n}({emit_reg i.arg.(0)}), {emit_reg i.res.(0)}\n`
 595     | Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) ->
 596         `       incl    {emit_reg i.res.(0)}\n`
 597     | Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) ->
 598         `       decl    {emit_reg i.res.(0)}\n`
 599     | Lop(Iintop_imm(Idiv, n)) ->
 600         let l = Misc.log2 n in
 601         let lbl = new_label() in
 602         output_test_zero i.arg.(0);
 603         `       jge     {emit_label lbl}\n`;
 604         `       addl    ${emit_int(n-1)}, {emit_reg i.arg.(0)}\n`;
 605         `{emit_label lbl}:      sarl    ${emit_int l}, {emit_reg i.arg.(0)}\n`
 606     | Lop(Iintop_imm(Imod, n)) ->
 607         let lbl = new_label() in
 608         `       movl    {emit_reg i.arg.(0)}, %eax\n`;
 609         `       testl   %eax, %eax\n`;
 610         `       jge     {emit_label lbl}\n`;
 611         `       addl    ${emit_int(n-1)}, %eax\n`;
 612         `{emit_label lbl}:      andl    ${emit_int(-n)}, %eax\n`;
 613         `       subl    %eax, {emit_reg i.arg.(0)}\n`
 614     | Lop(Iintop_imm(op, n)) ->
 615         (* We have i.arg.(0) = i.res.(0) *)
 616         `       {emit_string(instr_for_intop op)}       ${emit_int n}, {emit_reg i.res.(0)}\n`
 617     | Lop(Inegf | Iabsf as floatop) ->
 618         if not (is_tos i.arg.(0)) then
 619           `     fldl    {emit_reg i.arg.(0)}\n`;
 620         `       {emit_string(instr_for_floatop floatop)}\n`
 621     | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev)
 622           as floatop) ->
 623         begin match (is_tos i.arg.(0), is_tos i.arg.(1)) with
 624           (true, true) ->
 625           (* both operands on top of FP stack *)
 626           `     {emit_string(instr_for_floatop_pop floatop)}    %st, %st(1)\n`
 627         | (true, false) ->
 628           (* first operand on stack *)
 629           `     {emit_string(instr_for_floatop floatop)}        {emit_reg i.arg.(1)}\n`
 630         | (false, true) ->
 631           (* second operand on stack *)
 632           `     {emit_string(instr_for_floatop_reversed floatop)}       {emit_reg i.arg.(0)}\n`
 633         | (false, false) ->
 634           (* both operands in memory *)
 635           `     fldl    {emit_reg i.arg.(0)}\n`;
 636           `     {emit_string(instr_for_floatop floatop)}        {emit_reg i.arg.(1)}\n`
 637         end
 638     | Lop(Ifloatofint) ->
 639         begin match i.arg.(0).loc with
 640           Stack s ->
 641             `   fildl   {emit_reg i.arg.(0)}\n`
 642         | _ ->
 643             `   pushl   {emit_reg i.arg.(0)}\n`;
 644             `   fildl   (%esp)\n`;
 645             `   addl    $4, %esp\n`
 646         end
 647     | Lop(Iintoffloat) ->
 648         if not (is_tos i.arg.(0)) then
 649           `     fldl    {emit_reg i.arg.(0)}\n`;
 650         stack_offset := !stack_offset - 8;
 651         `       subl    $8, %esp\n`;
 652         `       fnstcw  4(%esp)\n`;
 653         `       movw    4(%esp), %ax\n`;
 654         `       movb    $12, %ah\n`;
 655         `       movw    %ax, 0(%esp)\n`;
 656         `       fldcw   0(%esp)\n`;
 657         begin match i.res.(0).loc with
 658           Stack s ->
 659             `   fistpl  {emit_reg i.res.(0)}\n`
 660         | _ ->
 661             `   fistpl  (%esp)\n`;
 662             `   movl    (%esp), {emit_reg i.res.(0)}\n`
 663         end;
 664         `       fldcw   4(%esp)\n`;
 665         `       addl    $8, %esp\n`;
 666         stack_offset := !stack_offset + 8
 667     | Lop(Ispecific(Ilea addr)) ->
 668         `       lea     {emit_addressing addr i.arg 0}, {emit_reg i.res.(0)}\n`
 669     | Lop(Ispecific(Istore_int(n, addr))) ->
 670         `       movl    ${emit_nativeint n}, {emit_addressing addr i.arg 0}\n`
 671     | Lop(Ispecific(Istore_symbol(s, addr))) ->
 672         `       movl    ${emit_symbol s}, {emit_addressing addr i.arg 0}\n`
 673     | Lop(Ispecific(Ioffset_loc(n, addr))) ->
 674         `       addl    ${emit_int n}, {emit_addressing addr i.arg 0}\n`
 675     | Lop(Ispecific(Ipush)) ->
 676         (* Push arguments in reverse order *)
 677         for n = Array.length i.arg - 1 downto 0 do
 678           let r = i.arg.(n) in
 679           match r with
 680             {loc = Reg _; typ = Float} ->
 681               ` subl    $8, %esp\n`;
 682               ` fstpl   0(%esp)\n`;
 683               stack_offset := !stack_offset + 8
 684           | {loc = Stack sl; typ = Float} ->
 685               let ofs = slot_offset sl 1 in
 686               ` pushl   {emit_int(ofs + 4)}(%esp)\n`;
 687               ` pushl   {emit_int(ofs + 4)}(%esp)\n`;
 688               stack_offset := !stack_offset + 8
 689           | _ ->
 690               ` pushl   {emit_reg r}\n`;
 691               stack_offset := !stack_offset + 4
 692         done
 693     | Lop(Ispecific(Ipush_int n)) ->
 694         `       pushl   ${emit_nativeint n}\n`;
 695         stack_offset := !stack_offset + 4
 696     | Lop(Ispecific(Ipush_symbol s)) ->
 697         `       pushl   ${emit_symbol s}\n`;
 698         stack_offset := !stack_offset + 4
 699     | Lop(Ispecific(Ipush_load addr)) ->
 700         `       pushl   {emit_addressing addr i.arg 0}\n`;
 701         stack_offset := !stack_offset + 4
 702     | Lop(Ispecific(Ipush_load_float addr)) ->
 703         `       pushl   {emit_addressing (offset_addressing addr 4) i.arg 0}\n`;
 704         `       pushl   {emit_addressing addr i.arg 0}\n`;
 705         stack_offset := !stack_offset + 8
 706     | Lop(Ispecific(Ifloatarithmem(double, op, addr))) ->
 707         if not (is_tos i.arg.(0)) then
 708           `     fldl    {emit_reg i.arg.(0)}\n`;
 709         `       {emit_string(instr_for_floatarithmem double op)}        {emit_addressing addr i.arg 1}\n`
 710     | Lop(Ispecific(Ifloatspecial s)) ->
 711         (* Push args on float stack if necessary *)
 712         for k = 0 to Array.length i.arg - 1 do
 713           if not (is_tos i.arg.(k)) then `      fldl    {emit_reg i.arg.(k)}\n`
 714         done;
 715         (* Fix-up for binary instrs whose args were swapped *)
 716         if Array.length i.arg = 2 && is_tos i.arg.(1) then
 717           `     fxch    %st(1)\n`;
 718         emit_floatspecial s
 719     | Lreloadretaddr ->
 720         ()
 721     | Lreturn ->
 722         output_epilogue();
 723         `       ret\n`
 724     | Llabel lbl ->
 725         `{emit_Llabel fallthrough lbl}:\n`
 726     | Lbranch lbl ->
 727         `       jmp     {emit_label lbl}\n`
 728     | Lcondbranch(tst, lbl) ->
 729         begin match tst with
 730           Itruetest ->
 731             output_test_zero i.arg.(0);
 732             `   jne     {emit_label lbl}\n`
 733         | Ifalsetest ->
 734             output_test_zero i.arg.(0);
 735             `   je      {emit_label lbl}\n`
 736         | Iinttest cmp ->
 737             `   cmpl    {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`;
 738             let b = name_for_cond_branch cmp in
 739             `   j{emit_string b}        {emit_label lbl}\n`
 740         | Iinttest_imm((Isigned Ceq | Isigned Cne |
 741                         Iunsigned Ceq | Iunsigned Cne) as cmp, 0) ->
 742             output_test_zero i.arg.(0);
 743             let b = name_for_cond_branch cmp in
 744             `   j{emit_string b}        {emit_label lbl}\n`
 745         | Iinttest_imm(cmp, n) ->
 746             `   cmpl    ${emit_int n}, {emit_reg i.arg.(0)}\n`;
 747             let b = name_for_cond_branch cmp in
 748             `   j{emit_string b}        {emit_label lbl}\n`
 749         | Ifloattest(cmp, neg) ->
 750             emit_float_test cmp neg i.arg lbl
 751         | Ioddtest ->
 752             `   testl   $1, {emit_reg i.arg.(0)}\n`;
 753             `   jne     {emit_label lbl}\n`
 754         | Ieventest ->
 755             `   testl   $1, {emit_reg i.arg.(0)}\n`;
 756             `   je      {emit_label lbl}\n`
 757         end
 758     | Lcondbranch3(lbl0, lbl1, lbl2) ->
 759             `   cmpl    $1, {emit_reg i.arg.(0)}\n`;
 760             begin match lbl0 with
 761               None -> ()
 762             | Some lbl -> `     jb      {emit_label lbl}\n`
 763             end;
 764             begin match lbl1 with
 765               None -> ()
 766             | Some lbl -> `     je      {emit_label lbl}\n`
 767             end;
 768             begin match lbl2 with
 769               None -> ()
 770             | Some lbl -> `     jg      {emit_label lbl}\n`
 771             end
 772     | Lswitch jumptbl ->
 773         let lbl = new_label() in
 774         `       jmp     *{emit_label lbl}(, {emit_reg i.arg.(0)}, 4)\n`;
 775         `       .data\n`;
 776         `{emit_label lbl}:`;
 777         for i = 0 to Array.length jumptbl - 1 do
 778           `     .long   {emit_label jumptbl.(i)}\n`
 779         done;
 780         `       .text\n`
 781     | Lsetuptrap lbl ->
 782         `       call    {emit_label lbl}\n`
 783     | Lpushtrap ->
 784         if trap_frame_size > 8 then
 785           `     subl    ${emit_int (trap_frame_size - 8)}, %esp\n`;
 786         `       pushl   {emit_symbol "caml_exception_pointer"}\n`;
 787         `       movl    %esp, {emit_symbol "caml_exception_pointer"}\n`;
 788         stack_offset := !stack_offset + trap_frame_size
 789     | Lpoptrap ->
 790         `       popl    {emit_symbol "caml_exception_pointer"}\n`;
 791         `       addl    ${emit_int (trap_frame_size - 4)}, %esp\n`;
 792         stack_offset := !stack_offset - trap_frame_size
 793     | Lraise ->
 794         if !Clflags.debug then begin
 795           `     call    {emit_symbol "caml_raise_exn"}\n`;
 796           record_frame Reg.Set.empty i.dbg
 797         end else begin
 798           `     movl    {emit_symbol "caml_exception_pointer"}, %esp\n`;
 799           `     popl    {emit_symbol "caml_exception_pointer"}\n`;
 800           if trap_frame_size > 8 then
 801             `   addl    ${emit_int (trap_frame_size - 8)}, %esp\n`;
 802           `     ret\n`
 803         end
 804
 805 let rec emit_all fallthrough i =
 806   match i.desc with
 807   |  Lend -> ()
 808   | _ ->
 809       emit_instr fallthrough i;
 810       emit_all
 811         (Linearize.has_fallthrough  i.desc)
 812         i.next
 813
 814 (* Emission of the floating-point constants *)
 815
 816 let emit_float_constant (lbl, cst) =
 817   `     .data\n`;
 818   `{emit_label lbl}:`;
 819   emit_float64_split_directive ".long" cst
 820
 821 (* Emission of external symbol references (for MacOSX) *)
 822
 823 let emit_external_symbol_direct s =
 824   `L{emit_symbol s}$stub:\n`;
 825   `     .indirect_symbol {emit_symbol s}\n`;
 826   `     hlt ; hlt ; hlt ; hlt ; hlt\n`
 827
 828 let emit_external_symbol_indirect s =
 829   `L{emit_symbol s}$non_lazy_ptr:\n`;
 830   `     .indirect_symbol {emit_symbol s}\n`;
 831   `     .long   0\n`
 832
 833 let emit_external_symbols () =
 834   `     .section __IMPORT,__pointers,non_lazy_symbol_pointers\n`;
 835   StringSet.iter emit_external_symbol_indirect !external_symbols_indirect;
 836   external_symbols_indirect := StringSet.empty;
 837   `     .section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5\n`;
 838   StringSet.iter emit_external_symbol_direct !external_symbols_direct;
 839   external_symbols_direct := StringSet.empty;
 840   if !Clflags.gprofile then begin
 841     `Lmcount$stub:\n`;
 842     `   .indirect_symbol mcount\n`;
 843     `   hlt ; hlt ; hlt ; hlt ; hlt\n`
 844   end
 845
 846 (* Emission of the profiling prelude *)
 847
 848 let emit_profile () =
 849   match Config.system with
 850     "linux_elf" | "gnu" ->
 851       ` pushl   %eax\n`;
 852       ` movl    %esp, %ebp\n`;
 853       ` pushl   %ecx\n`;
 854       ` pushl   %edx\n`;
 855       ` call    {emit_symbol "mcount"}\n`;
 856       ` popl    %edx\n`;
 857       ` popl    %ecx\n`;
 858       ` popl    %eax\n`
 859   | "bsd_elf" ->
 860       ` pushl   %eax\n`;
 861       ` movl    %esp, %ebp\n`;
 862       ` pushl   %ecx\n`;
 863       ` pushl   %edx\n`;
 864       ` call    .mcount\n`;
 865       ` popl    %edx\n`;
 866       ` popl    %ecx\n`;
 867       ` popl    %eax\n`
 868   | "macosx" ->
 869       ` pushl   %eax\n`;
 870       ` movl    %esp, %ebp\n`;
 871       ` pushl   %ecx\n`;
 872       ` pushl   %edx\n`;
 873       ` call    Lmcount$stub\n`;
 874       ` popl    %edx\n`;
 875       ` popl    %ecx\n`;
 876       ` popl    %eax\n`
 877   | _ -> () (*unsupported yet*)
 878
 879 (* Emission of a function declaration *)
 880
 881 let fundecl fundecl =
 882   function_name := fundecl.fun_name;
 883   fastcode_flag := fundecl.fun_fast;
 884   tailrec_entry_point := new_label();
 885   stack_offset := 0;
 886   float_constants := [];
 887   call_gc_sites := [];
 888   bound_error_sites := [];
 889   bound_error_call := 0;
 890   `     .text\n`;
 891   emit_align 16;
 892   if macosx
 893   && not !Clflags.output_c_object
 894   && is_generic_function fundecl.fun_name
 895   then (* PR#4690 *)
 896     `   .private_extern {emit_symbol fundecl.fun_name}\n`
 897   else
 898     `   .globl  {emit_symbol fundecl.fun_name}\n`;
 899   `{emit_symbol fundecl.fun_name}:\n`;
 900   if !Clflags.gprofile then emit_profile();
 901   let n = frame_size() - 4 in
 902   if n > 0 then
 903     `   subl    ${emit_int n}, %esp\n`;
 904   `{emit_label !tailrec_entry_point}:\n`;
 905   emit_all true fundecl.fun_body;
 906   List.iter emit_call_gc !call_gc_sites;
 907   emit_call_bound_errors ();
 908   List.iter emit_float_constant !float_constants;
 909   match Config.system with
 910     "linux_elf" | "bsd_elf" | "gnu" ->
 911       ` .type   {emit_symbol fundecl.fun_name},@function\n`;
 912       ` .size   {emit_symbol fundecl.fun_name},.-{emit_symbol fundecl.fun_name}\n`
 913   | _ -> ()
 914
 915
 916 (* Emission of data *)
 917
 918 let emit_item = function
 919     Cglobal_symbol s ->
 920       ` .globl  {emit_symbol s}\n`;
 921   | Cdefine_symbol s ->
 922       `{emit_symbol s}:\n`
 923   | Cdefine_label lbl ->
 924       `{emit_label (100000 + lbl)}:\n`
 925   | Cint8 n ->
 926       ` .byte   {emit_int n}\n`
 927   | Cint16 n ->
 928       ` {emit_string word_dir}  {emit_int n}\n`
 929   | Cint32 n ->
 930       ` .long   {emit_nativeint n}\n`
 931   | Cint n ->
 932       ` .long   {emit_nativeint n}\n`
 933   | Csingle f ->
 934       emit_float32_directive ".long" f
 935   | Cdouble f ->
 936       emit_float64_split_directive ".long" f
 937   | Csymbol_address s ->
 938       ` .long   {emit_symbol s}\n`
 939   | Clabel_address lbl ->
 940       ` .long   {emit_label (100000 + lbl)}\n`
 941   | Cstring s ->
 942       if use_ascii_dir
 943       then emit_string_directive "      .ascii  " s
 944       else emit_bytes_directive  "      .byte   " s
 945   | Cskip n ->
 946       if n > 0 then `   {emit_string skip_dir}  {emit_int n}\n`
 947   | Calign n ->
 948       emit_align n
 949
 950 let data l =
 951   `     .data\n`;
 952   List.iter emit_item l
 953
 954 (* Beginning / end of an assembly file *)
 955
 956 let begin_assembly() =
 957   let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
 958   `     .data\n`;
 959   `     .globl  {emit_symbol lbl_begin}\n`;
 960   `{emit_symbol lbl_begin}:\n`;
 961   let lbl_begin = Compilenv.make_symbol (Some "code_begin") in
 962   `     .text\n`;
 963   `     .globl  {emit_symbol lbl_begin}\n`;
 964   `{emit_symbol lbl_begin}:\n`;
 965   if macosx then `      nop\n` (* PR#4690 *)
 966
 967 let end_assembly() =
 968   let lbl_end = Compilenv.make_symbol (Some "code_end") in
 969   `     .text\n`;
 970   if macosx then `      nop\n`; (* suppress "ld warning: atom sorting error" *)
 971   `     .globl  {emit_symbol lbl_end}\n`;
 972   `{emit_symbol lbl_end}:\n`;
 973   `     .data\n`;
 974   let lbl_end = Compilenv.make_symbol (Some "data_end") in
 975   `     .globl  {emit_symbol lbl_end}\n`;
 976   `{emit_symbol lbl_end}:\n`;
 977   `     .long   0\n`;
 978   let lbl = Compilenv.make_symbol (Some "frametable") in
 979   `     .globl  {emit_symbol lbl}\n`;
 980   `{emit_symbol lbl}:\n`;
 981   emit_frames
 982     { efa_label = (fun l -> `   .long   {emit_label l}\n`);
 983       efa_16 = (fun n -> `      {emit_string word_dir}  {emit_int n}\n`);
 984       efa_32 = (fun n -> `      .long   {emit_int32 n}\n`);
 985       efa_word = (fun n -> `    .long   {emit_int n}\n`);
 986       efa_align = emit_align;
 987       efa_label_rel = (fun lbl ofs ->
 988                            `    .long   {emit_label lbl} - . + {emit_int32 ofs}\n`);
 989       efa_def_label = (fun l -> `{emit_label l}:\n`);
 990       efa_string = (fun s ->
 991         let s = s ^ "\000" in
 992         if use_ascii_dir
 993         then emit_string_directive "    .ascii  " s
 994         else emit_bytes_directive  "    .byte   " s) };
 995   if macosx then emit_external_symbols ();
 996   if Config.system = "linux_elf" then
 997     (* Mark stack as non-executable, PR#4564 *)
 998     `\n .section .note.GNU-stack,\"\",%progbits\n`