1 --- a/HerculesCompiler/llvm-passes/CMakeLists.txt
2 +++ b/HerculesCompiler/llvm-passes/CMakeLists.txt
4 MESSAGE( STATUS "PROJECTS_MAIN_SRC_DIR = " ${PROJECTS_MAIN_SRC_DIR} )
5 MESSAGE( STATUS "PROJECTS_MAIN_INCLUDE_DIR = " ${PROJECTS_MAIN_INCLUDE_DIR} )
7 -include_directories(${PROJECTS_MAIN_INCLUDE_DIR})
8 +include_directories(${CMAKE_CURRENT_BINARY_DIR}/include ${PROJECTS_MAIN_INCLUDE_DIR})
10 +include(CMakeDependentOption)
12 +# // ----------------------------
13 +# // ---- CORE CONFIGURATION ----
14 +# // ----------------------------
16 +# // Amount of bytes available in the SPM or cache
17 +# // SPM (TX1, TX2) = 48000
18 +# // CACHE TX1 CPU = 524288
19 +# // CACHE TX1 GPU = 262144
20 +# // CACHE TX2 GPU = 524288
21 +# // CACHE TX2 CPU = 524288
22 +set(CPU_CACHE_SIZE_DEFAULT 524288 CACHE STRING "CPU cache size")
23 +set(GPU_CACHE_SIZE_DEFAULT 524288 CACHE STRING "GPU cache size")
24 +set(GPU_SCRATCHPAD_SIZE_DEFAULT 48000 CACHE STRING "GPU scratchpad size")
27 +option(USE_HW_CACHES "Use hardware cache prefetches in Specialized function, in place of SPM-based" OFF)
28 +option(HIERARCHICAL_INTERVALS "Use hierarchical PREM intervals for loops" OFF)
30 +# // -----------------------------
31 +# // ---- CACHE CONFIGURATION ----
32 +# // -----------------------------
34 +# // If we use hardware caches, we can either use load or prefetch instructions
35 +# // to bring the data local.
36 +set(PREFETCH_REPS 1 CACHE STRING "")
37 +cmake_dependent_option(USE_HW_CACHES_PREFETCH
38 + "Use prefetch instructions" ON "USE_HW_CACHES" ON)
39 +cmake_dependent_option(USE_HW_CACHES_INLINEPTX_PREFETCH
40 + "Use INLINEPTX prefetch" OFF "USE_HW_CACHES" OFF)
41 +cmake_dependent_option(USE_HW_CACHES_LIBCALL
42 + "Prefetch via library call" OFF "USE_HW_CACHES" OFF)
43 +cmake_dependent_option(USE_HW_CACHES_VOLALOAD
44 + "Prefetch via load instructions" OFF "USE_HW_CACHES" OFF)
46 +# // Use a single writeback function when using caches
47 +cmake_dependent_option(USE_HW_CACHES_SINGLEWRITEBACK
48 + "" OFF "USE_HW_CACHES" OFF)
49 +cmake_dependent_option(USE_HW_CACHES_INDWRITEBACK_LIBCALL
50 + "" ON "USE_HW_CACHES" ON)
51 +cmake_dependent_option(USE_HW_CACHES_INDWRITEBACK_INLINE
52 + "" OFF "USE_HW_CACHES" OFF)
54 +# // ---------------------------------------
55 +# // ---- CODE GENERATION CONFIGURATION ----
56 +# // ---------------------------------------
58 +# // Allow exporting of PREMized functions through external linkage (useful for
59 +# // compiling PREMized library functions).
60 +option(ULES_EXTERNAL_LINKAGE "Allow exporting of PREMized functions through external linkage (useful for compiling PREMized library functions)"
63 +# // Inlining of specialized functions
64 +option(ALWAYS_INLINE_UNSPECIALIZED "" ON)
65 +option(ALWAYS_INLINE_LOAD "" ON)
66 +option(ALWAYS_INLINE_EXECUTE "" ON)
67 +option(ALWAYS_INLINE_STORE "" ON)
69 +option(AGGRESSIVELY_INLINE_CALL_TREE "Inlining of call trees in Channel Arg Insertion" ON)
71 +# // -------------------------------------------------
72 +# // ---- ADVANCED OPTIONS ---- DEFAULTS ARE SANE ----
73 +# // -------------------------------------------------
75 +option(USE_PREM_INIT_FINI "Synchronization" ON)
77 +option(NEVER_USE_SOFTDMA "Use the new DMA-like loading of data for Load and/or Store phases" ON)
80 +option(SCHEDULE_COMPATIBLE "" OFF)
81 +option(SCHEDULE_LES "" ON)
82 +option(SCHEDULE_COMBINED "" OFF)
84 +set(NUM_THREADS_LES_IN_COMBINED 512 CACHE STRING "COMBINED schedule only: Number of threads to use for Specialized")
86 +set(CACHE_LINE_SIZE 128 CACHE STRING "The size in bytes of a cache line (128 bytes on TX1)")
88 +option(NEVER_INLINE_SYNC "Inlining of synchronization functions" OFF)
90 +configure_file(include/Config/Options.h.in include/Config/Options.h)
94 +++ b/HerculesCompiler/llvm-passes/include/Config/Options.h.in
97 +// ----------------------------
98 +// ---- CORE CONFIGURATION ----
99 +// ----------------------------
101 +// Amount of bytes available in the SPM or cache
102 +// SPM (TX1, TX2) = 48000
103 +// CACHE TX1 CPU = 524288
104 +// CACHE TX1 GPU = 262144
105 +// CACHE TX2 GPU = 524288
106 +// CACHE TX2 CPU = 524288
107 +#cmakedefine CPU_CACHE_SIZE_DEFAULT @GPU_CACHE_SIZE_DEFAULT@
108 +#cmakedefine GPU_CACHE_SIZE_DEFAULT @CPU_CACHE_SIZE_DEFAULT@
109 +#cmakedefine GPU_SCRATCHPAD_SIZE_DEFAULT @GPU_SCRATCHPAD_SIZE_DEFAULT@
111 +// Use hardware cache prefetches in Specialized function, in place of SPM-based.
112 +#cmakedefine USE_HW_CACHES
114 +// Use hierarchical PREM intervals for loops.
115 +#cmakedefine HIERARCHICAL_INTERVALS
117 +// -----------------------------
118 +// ---- CACHE CONFIGURATION ----
119 +// -----------------------------
121 +// If we use hardware caches, we can either use load or prefetch instructions
122 +// to bring the data local.
123 +#cmakedefine PREFETCH_REPS @PREFETCH_REPS@
124 +#cmakedefine USE_HW_CACHES_PREFETCH
125 +#cmakedefine USE_HW_CACHES_INLINEPTX_PREFETCH
126 +#cmakedefine USE_HW_CACHES_LIBCALL
127 +#cmakedefine USE_HW_CACHES_VOLALOAD
129 +// Use a single writeback function when using caches
130 +#cmakedefine USE_HW_CACHES_SINGLEWRITEBACK
131 +#cmakedefine USE_HW_CACHES_INDWRITEBACK_LIBCALL
132 +#cmakedefine USE_HW_CACHES_INDWRITEBACK_INLINE
134 +// ---------------------------------------
135 +// ---- CODE GENERATION CONFIGURATION ----
136 +// ---------------------------------------
138 +// Allow exporting of PREMized functions through external linkage (useful for
139 +// compiling PREMized library functions).
140 +#cmakedefine ULES_EXTERNAL_LINKAGE
142 +// Inlining of specialized functions
143 +#cmakedefine ALWAYS_INLINE_UNSPECIALIZED
144 +#cmakedefine ALWAYS_INLINE_LOAD
145 +#cmakedefine ALWAYS_INLINE_EXECUTE
146 +#cmakedefine ALWAYS_INLINE_STORE
148 +// Inlining of call trees in Channel Arg Insertion
149 +#cmakedefine AGGRESSIVELY_INLINE_CALL_TREE
151 +// -------------------------------------------------
152 +// ---- ADVANCED OPTIONS ---- DEFAULTS ARE SANE ----
153 +// -------------------------------------------------
156 +#cmakedefine USE_PREM_INIT_FINI
158 +// Use the new DMA-like loading of data for Load and/or Store phases
159 +#cmakedefine NEVER_USE_SOFTDMA
162 +#cmakedefine SCHEDULE_COMPATIBLE
163 +#cmakedefine SCHEDULE_LES
164 +#cmakedefine SCHEDULE_COMBINED
166 +// COMBINED schedule only: Number of threads to use for Specialized
167 +#cmakedefine NUM_THREADS_LES_IN_COMBINED @NUM_THREADS_LES_IN_COMBINED@
169 +// The size in bytes of a cache line (128 bytes on TX1)
170 +#cmakedefine CACHE_LINE_SIZE @CACHE_LINE_SIZE@
172 +// Inlining of synchronization functions
173 +#cmakedefine NEVER_INLINE_SYNC
175 +// -------------------------------------------------------------------------- //
176 +// ---- DO NOT CHANGE BELOW THIS LINE ---- AUTOGENERATION BASED ON ABOVE ---- //
177 +// -------------------------------------------------------------------------- //
179 +// Check that schedule is sane.
180 +#if defined(SCHEDULE_COMPATIBLE) && \
181 + (defined(SCHEDULE_LES) || defined(SCHEDULE_COMBINED))
182 +#error Multiple schedules defined!
184 +#if defined(SCHEDULE_LES) && \
185 + (defined(SCHEDULE_COMPATIBLE) || defined(SCHEDULE_COMBINED))
186 +#error Multiple schedules defined!
188 +#if defined(SCHEDULE_COMBINED) && \
189 + (defined(SCHEDULE_LES) || defined(SCHEDULE_COMPATIBLE))
190 +#error Multiple schedules defined!
192 +#if !defined(SCHEDULE_COMPATIBLE) && !defined(SCHEDULE_LES) && \
193 + !defined(SCHEDULE_COMBINED)
194 +#error No schedule defined!
197 +// Check that the HW CACHE config is sane.
198 +#ifdef USE_HW_CACHES
199 +#define DONT_SPECIALIZE_EXECUTE
200 +//# if defined(USE_HW_CACHES_PREFETCH) && defined(USE_HW_CACHES_VOLALOAD)
201 +//# error Using both volatile loads and prefetches for HW CACHE mode.
203 +#if !defined(USE_HW_CACHES_PREFETCH) && !defined(USE_HW_CACHES_VOLALOAD) && \
204 + !defined(USE_HW_CACHES_INLINEPTX_PREFETCH) && \
205 + !defined(USE_HW_CACHES_LIBCALL)
206 +#error No policy for HW caches defined!
208 +#if defined(USE_HW_CACHES_INLINEPTX_PREFETCH) && \
209 + (defined(USE_HW_CACHES_PREFETCH) || defined(USE_HW_CACHES_VOLALOAD))
210 +#error Multiple cache policies defined!
212 +#if defined(USE_HW_CACHES_PREFETCH) && \
213 + (defined(USE_HW_CACHES_INLINEPTX_PREFETCH) || \
214 + defined(USE_HW_CACHES_VOLALOAD))
215 +#error Multiple cache policies defined!
217 +#if defined(USE_HW_CACHES_VOLALOAD) && \
218 + (defined(USE_HW_CACHES_PREFETCH) || \
219 + defined(USE_HW_CACHES_INLINEPTX_PREFETCH))
220 +#error Multiple cache policies defined!
223 +#if defined(USE_HW_CACHES_SINGLEWRITEBACK) && \
224 + defined(USE_HW_CACHES_INDWRITEBACK_LIBCALL) && \
225 + defined(USE_HW_CACHES_INDWRITEBACK_INLINE)
226 +#error Multiple cache writeback policies!
227 +#elif defined(USE_HW_CACHES_INDWRITEBACK_LIBCALL) && \
228 + defined(USE_HW_CACHES_INDWRITEBACK_INLINE)
229 +#error Multiple cache writeback policies!
230 +#elif defined(USE_HW_CACHES_SINGLEWRITEBACK) && \
231 + defined(USE_HW_CACHES_INDWRITEBACK_INLINE)
232 +#error Multiple cache writeback policies!
233 +#elif defined(USE_HW_CACHES_SINGLEWRITEBACK) && \
234 + defined(USE_HW_CACHES_INDWRITEBACK_LIBCALL)
235 +#error Multiple cache writeback policies!
240 +#ifdef EMPTY_COMPUTE_PHASE
241 +#warning Empty compute phase!