1 (***********************************************************************)
5 (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
7 (* Copyright 2000 Institut National de Recherche en Informatique et *)
8 (* en Automatique. All rights reserved. This file is distributed *)
9 (* under the terms of the Q Public License version 1.0. *)
11 (***********************************************************************)
13 (* $Id: selection.ml 6276 2004-05-03 12:27:07Z xleroy $ *)
15 (* Instruction selection for the IA64 processor *)
23 (* Helper function for add selection *)
25 let reassociate_add = function
26 [Cconst_int n; arg] ->
28 | [Cop(Caddi, [arg1; Cconst_int n]); arg3] ->
29 [Cop(Caddi, [arg1; arg3]); Cconst_int n]
30 | [Cop(Caddi, [Cconst_int n; arg1]); arg3] ->
31 [Cop(Caddi, [arg1; arg3]); Cconst_int n]
32 | [arg1; Cop(Caddi, [Cconst_int n; arg3])] ->
33 [Cop(Caddi, [arg1; arg3]); Cconst_int n]
34 | [arg1; Cop(Caddi, [arg2; arg3])] ->
35 [Cop(Caddi, [arg1; arg2]); arg3]
38 (* Helper function for mult-immediate selection *)
40 let rec count_one_bits n =
42 else if n land 1 = 0 then count_one_bits (n lsr 1)
43 else 1 + count_one_bits (n lsr 1)
45 class selector = object (self)
47 inherit Selectgen.selector_generic as super
49 (* Range of immediate arguments:
52 sub reversed 8-bit signed
53 mul at most 16 "one" bits
55 and, or, xor 8-bit signed
56 lsl, lsr, asr 6-bit unsigned
58 For is_immediate, we put 8-bit signed and treat adds specially
59 (selectgen already does the right thing for shifts) *)
61 method is_immediate n = n >= -128 && n < 128
63 method is_immediate_add n = n >= -8192 && n < 8192
65 method select_addressing arg = (Iindexed, arg)
67 method select_operation op args =
69 match op with Cadda -> Caddi | Csuba -> Csubi | _ -> op in
71 match norm_op with Caddi -> reassociate_add args | _ -> args in
72 match (norm_op, norm_args) with
73 (* Recognize x + y + 1 and x - y - 1 *)
74 | (Caddi, [Cop(Caddi, [arg1; arg2]); Cconst_int 1]) ->
75 (Ispecific Iadd1, [arg1; arg2])
76 | (Caddi, [Cop(Clsl, [arg1; Cconst_int 1]); Cconst_int 1]) ->
77 (Ispecific Iadd1, [arg1])
78 | (Csubi, [Cop(Csubi, [arg1; arg2]); Cconst_int 1]) ->
79 (Ispecific Isub1, [arg1; arg2])
80 | (Csubi, [Cop(Csubi, [arg1; Cconst_int 1]); arg2]) ->
81 (Ispecific Isub1, [arg1; arg2])
82 (* Recognize add immediate *)
83 | (Caddi, [arg; Cconst_int n]) when self#is_immediate_add n ->
84 (Iintop_imm(Iadd, n), [arg])
85 (* Turn sub immediate into add immediate *)
86 | (Csubi, [arg; Cconst_int n]) when self#is_immediate_add (-n) ->
87 (Iintop_imm(Iadd, -n), [arg])
88 (* Recognize imm - arg *)
89 | (Csubi, [Cconst_int n; arg]) when self#is_immediate n ->
90 (Iintop_imm(Isub, n), [arg])
91 (* Recognize shift-add operations *)
92 | (Caddi, [arg2; Cop(Clsl, [arg1; Cconst_int(1|2|3|4 as shift)])]) ->
93 (Ispecific(Ishladd shift), [arg1; arg2])
94 | (Caddi, [Cop(Clsl, [arg1; Cconst_int(1|2|3|4 as shift)]); arg2]) ->
95 (Ispecific(Ishladd shift), [arg1; arg2])
96 (* Recognize truncation/normalization of 64-bit integers to 32 bits *)
97 | (Casr, [Cop(Clsl, [arg; Cconst_int 32]); Cconst_int 32]) ->
98 (Ispecific (Isignextend 4), [arg])
99 (* Recognize x * cst and cst * x *)
100 | (Cmuli, [arg; Cconst_int n]) ->
101 self#select_imul_imm arg n
102 | (Cmuli, [Cconst_int n; arg]) ->
103 self#select_imul_imm arg n
104 (* Prevent the recognition of (x / cst) and (x % cst) when cst is not
105 a power of 2, which do not correspond to an instruction.
106 Turn general division and modulus into calls to C library functions *)
107 | (Cdivi, [arg; Cconst_int n]) when n = 1 lsl (Misc.log2 n) ->
108 (Iintop_imm(Idiv, n), [arg])
110 (Iextcall("__divdi3", false), args)
111 | (Cmodi, [arg; Cconst_int n]) when n = 1 lsl (Misc.log2 n) && n <> 1 ->
112 (Iintop_imm(Imod, n), [arg])
114 (Iextcall("__moddi3", false), args)
115 (* Recognize mult-add and mult-sub instructions *)
116 | (Caddf, [Cop(Cmulf, [arg1; arg2]); arg3]) ->
117 (Ispecific Imultaddf, [arg1; arg2; arg3])
118 | (Caddf, [arg3; Cop(Cmulf, [arg1; arg2])]) ->
119 (Ispecific Imultaddf, [arg1; arg2; arg3])
120 | (Csubf, [Cop(Cmulf, [arg1; arg2]); arg3]) ->
121 (Ispecific Imultsubf, [arg1; arg2; arg3])
122 | (Csubf, [arg3; Cop(Cmulf, [arg1; arg2])]) ->
123 (Ispecific Isubmultf, [arg1; arg2; arg3])
124 (* Use default selector otherwise *)
126 super#select_operation op args
128 method private select_imul_imm arg n =
129 if count_one_bits n <= 16
130 then (Iintop_imm(Imul, n), [arg])
131 else (Iintop Imul, [arg; Cconst_int n])
133 (* To palliate the lack of addressing with displacement, multiple
134 stores to the address r are translated as follows
135 (t1 and t2 are two temp regs)
138 compute data1 in reg1
139 compute data2 in reg2
140 store reg1 at t1 and increment t1 by 16
141 store reg2 at t2 and increment t2 by 16
142 compute data3 in reg3
143 compute data4 in reg4
144 store reg3 at t1 and increment t1 by 16
145 store reg4 at t2 and increment t2 by 16
147 Note: we use two temp regs and perform stores by groups of 2
148 in order to expose more instruction-level parallelism. *)
149 method emit_stores env data regs_addr =
150 let t1 = Reg.create Addr and t2 = Reg.create Addr in
151 self#insert (Iop(Iintop_imm(Iadd, -8))) regs_addr [|t1|];
152 self#insert (Iop Imove) regs_addr [|t2|];
153 (* Store components by batch of 2 *)
154 let backlog = ref None in
157 None -> (* keep it for later *)
159 | Some r' -> (* store r' at t1 and r at t2 *)
160 self#insert (Iop(Ispecific(Istoreincr 16))) [| t1; r' |] [| t1 |];
161 self#insert (Iop(Ispecific(Istoreincr 16))) [| t2; r |] [| t2 |];
165 match self#emit_expr env exp with
167 | Some regs -> Array.iter do_store regs)
169 (* Store the backlog if any *)
170 begin match !backlog with
172 | Some r -> self#insert (Iop(Ispecific(Istoreincr 16))) [| t1; r |] [| t1 |]
174 (* Insert an init barrier *)
175 self#insert (Iop(Ispecific Iinitbarrier)) [||] [||]
178 let fundecl f = (new selector)#emit_fundecl f