l4/pkg/ocaml/contrib/asmcomp/ia64/selection.ml

   1 (***********************************************************************)
   2 (*                                                                     *)
   3 (*                           Objective Caml                            *)
   4 (*                                                                     *)
   5 (*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
   6 (*                                                                     *)
   7 (*  Copyright 2000 Institut National de Recherche en Informatique et   *)
   8 (*  en Automatique.  All rights reserved.  This file is distributed    *)
   9 (*  under the terms of the Q Public License version 1.0.               *)
  10 (*                                                                     *)
  11 (***********************************************************************)
  12
  13 (* $Id: selection.ml 6276 2004-05-03 12:27:07Z xleroy $ *)
  14
  15 (* Instruction selection for the IA64 processor *)
  16
  17 open Misc
  18 open Cmm
  19 open Reg
  20 open Arch
  21 open Mach
  22
  23 (* Helper function for add selection *)
  24
  25 let reassociate_add = function
  26     [Cconst_int n; arg] ->
  27         [arg; Cconst_int n]
  28   | [Cop(Caddi, [arg1; Cconst_int n]); arg3] ->
  29         [Cop(Caddi, [arg1; arg3]); Cconst_int n]
  30   | [Cop(Caddi, [Cconst_int n; arg1]); arg3] ->
  31         [Cop(Caddi, [arg1; arg3]); Cconst_int n]
  32   | [arg1; Cop(Caddi, [Cconst_int n; arg3])] ->
  33         [Cop(Caddi, [arg1; arg3]); Cconst_int n]
  34   | [arg1; Cop(Caddi, [arg2; arg3])] ->
  35         [Cop(Caddi, [arg1; arg2]); arg3]
  36   | args -> args
  37
  38 (* Helper function for mult-immediate selection *)
  39
  40 let rec count_one_bits n =
  41   if n = 0 then 0
  42   else if n land 1 = 0 then count_one_bits (n lsr 1)
  43   else 1 + count_one_bits (n lsr 1)
  44
  45 class selector = object (self)
  46
  47 inherit Selectgen.selector_generic as super
  48
  49 (* Range of immediate arguments:
  50      add                14-bit signed
  51      sub                turned into add
  52      sub reversed       8-bit signed
  53      mul                at most 16 "one" bits
  54      div, mod           powers of 2
  55      and, or, xor       8-bit signed
  56      lsl, lsr, asr      6-bit unsigned
  57      cmp                8-bit signed
  58    For is_immediate, we put 8-bit signed and treat adds specially
  59    (selectgen already does the right thing for shifts) *)
  60
  61 method is_immediate n = n >= -128 && n < 128
  62
  63 method is_immediate_add n = n >= -8192 && n < 8192
  64
  65 method select_addressing arg = (Iindexed, arg)
  66
  67 method select_operation op args =
  68   let norm_op =
  69     match op with Cadda -> Caddi | Csuba -> Csubi | _ -> op in
  70   let norm_args =
  71     match norm_op with Caddi -> reassociate_add args | _ -> args in
  72   match (norm_op, norm_args) with
  73   (* Recognize x + y + 1 and x - y - 1 *)
  74   | (Caddi, [Cop(Caddi, [arg1; arg2]); Cconst_int 1]) ->
  75       (Ispecific Iadd1, [arg1; arg2])
  76   | (Caddi, [Cop(Clsl, [arg1; Cconst_int 1]); Cconst_int 1]) ->
  77       (Ispecific Iadd1, [arg1])
  78   | (Csubi, [Cop(Csubi, [arg1; arg2]); Cconst_int 1]) ->
  79       (Ispecific Isub1, [arg1; arg2])
  80   | (Csubi, [Cop(Csubi, [arg1; Cconst_int 1]); arg2]) ->
  81       (Ispecific Isub1, [arg1; arg2])
  82   (* Recognize add immediate *)
  83   | (Caddi, [arg; Cconst_int n]) when self#is_immediate_add n ->
  84       (Iintop_imm(Iadd, n), [arg])
  85   (* Turn sub immediate into add immediate *)
  86   | (Csubi, [arg; Cconst_int n]) when self#is_immediate_add (-n) ->
  87       (Iintop_imm(Iadd, -n), [arg])
  88   (* Recognize imm - arg *)
  89   | (Csubi, [Cconst_int n; arg]) when self#is_immediate n ->
  90       (Iintop_imm(Isub, n), [arg])
  91   (* Recognize shift-add operations *)
  92   | (Caddi, [arg2; Cop(Clsl, [arg1; Cconst_int(1|2|3|4 as shift)])]) ->
  93       (Ispecific(Ishladd shift), [arg1; arg2])
  94   | (Caddi, [Cop(Clsl, [arg1; Cconst_int(1|2|3|4 as shift)]); arg2]) ->
  95       (Ispecific(Ishladd shift), [arg1; arg2])
  96   (* Recognize truncation/normalization of 64-bit integers to 32 bits *)
  97   | (Casr, [Cop(Clsl, [arg; Cconst_int 32]); Cconst_int 32]) ->
  98       (Ispecific (Isignextend 4), [arg])
  99   (* Recognize x * cst and cst * x *)
 100   | (Cmuli, [arg; Cconst_int n]) ->
 101       self#select_imul_imm arg n
 102   | (Cmuli, [Cconst_int n; arg]) ->
 103       self#select_imul_imm arg n
 104   (* Prevent the recognition of (x / cst) and (x % cst) when cst is not
 105      a power of 2, which do not correspond to an instruction.
 106      Turn general division and modulus into calls to C library functions *)
 107   | (Cdivi, [arg; Cconst_int n]) when n = 1 lsl (Misc.log2 n) ->
 108       (Iintop_imm(Idiv, n), [arg])
 109   | (Cdivi, _) ->
 110       (Iextcall("__divdi3", false), args)
 111   | (Cmodi, [arg; Cconst_int n]) when n = 1 lsl (Misc.log2 n) && n <> 1 ->
 112       (Iintop_imm(Imod, n), [arg])
 113   | (Cmodi, _) ->
 114       (Iextcall("__moddi3", false), args)
 115   (* Recognize mult-add and mult-sub instructions *)
 116   | (Caddf, [Cop(Cmulf, [arg1; arg2]); arg3]) ->
 117       (Ispecific Imultaddf, [arg1; arg2; arg3])
 118   | (Caddf, [arg3; Cop(Cmulf, [arg1; arg2])]) ->
 119       (Ispecific Imultaddf, [arg1; arg2; arg3])
 120   | (Csubf, [Cop(Cmulf, [arg1; arg2]); arg3]) ->
 121       (Ispecific Imultsubf, [arg1; arg2; arg3])
 122   | (Csubf, [arg3; Cop(Cmulf, [arg1; arg2])]) ->
 123       (Ispecific Isubmultf, [arg1; arg2; arg3])
 124   (* Use default selector otherwise *)
 125   | _ ->
 126       super#select_operation op args
 127
 128 method private select_imul_imm arg n =
 129   if count_one_bits n <= 16
 130   then (Iintop_imm(Imul, n), [arg])
 131   else (Iintop Imul, [arg; Cconst_int n])
 132
 133 (* To palliate the lack of addressing with displacement, multiple
 134    stores to the address r are translated as follows
 135    (t1 and t2 are two temp regs)
 136       t1 := r - 8
 137       t2 := r
 138       compute data1 in reg1
 139       compute data2 in reg2
 140       store reg1 at t1 and increment t1 by 16
 141       store reg2 at t2 and increment t2 by 16
 142       compute data3 in reg3
 143       compute data4 in reg4
 144       store reg3 at t1 and increment t1 by 16
 145       store reg4 at t2 and increment t2 by 16
 146       ...
 147     Note: we use two temp regs and perform stores by groups of 2
 148     in order to expose more instruction-level parallelism. *)
 149 method emit_stores env data regs_addr =
 150   let t1 = Reg.create Addr and t2 = Reg.create Addr in
 151   self#insert (Iop(Iintop_imm(Iadd, -8))) regs_addr [|t1|];
 152   self#insert (Iop Imove) regs_addr [|t2|];
 153   (* Store components by batch of 2 *)
 154   let backlog = ref None in
 155   let do_store r =
 156     match !backlog with
 157       None -> (* keep it for later *)
 158         backlog := Some r
 159     | Some r' -> (* store r' at t1 and r at t2 *)
 160         self#insert (Iop(Ispecific(Istoreincr 16))) [| t1; r' |] [| t1 |];
 161         self#insert (Iop(Ispecific(Istoreincr 16))) [| t2; r  |] [| t2 |];
 162         backlog := None in
 163   List.iter
 164     (fun exp ->
 165       match self#emit_expr env exp with
 166         None -> assert false
 167       | Some regs -> Array.iter do_store regs)
 168     data;
 169   (* Store the backlog if any *)
 170   begin match !backlog with
 171     None -> ()
 172   | Some r -> self#insert (Iop(Ispecific(Istoreincr 16))) [| t1; r |] [| t1 |]
 173   end;
 174   (* Insert an init barrier *)
 175   self#insert (Iop(Ispecific Iinitbarrier)) [||] [||]
 176 end
 177
 178 let fundecl f = (new selector)#emit_fundecl f