]> rtime.felk.cvut.cz Git - l4.git/blob - l4/pkg/valgrind/src/valgrind-3.6.0-svn/cachegrind/cg_sim.c
update
[l4.git] / l4 / pkg / valgrind / src / valgrind-3.6.0-svn / cachegrind / cg_sim.c
1
2 /*--------------------------------------------------------------------*/
3 /*--- Cache simulation                                    cg_sim.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7    This file is part of Cachegrind, a Valgrind tool for cache
8    profiling programs.
9
10    Copyright (C) 2002-2010 Nicholas Nethercote
11       njn@valgrind.org
12
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27
28    The GNU General Public License is contained in the file COPYING.
29 */
30
31 /* Notes:
32   - simulates a write-allocate cache
33   - (block --> set) hash function uses simple bit selection
34   - handling of references straddling two cache blocks:
35       - counts as only one cache access (not two)
36       - both blocks hit                  --> one hit
37       - one block hits, the other misses --> one miss
38       - both blocks miss                 --> one miss (not two)
39 */
40
41 typedef struct {
42    Int          size;                   /* bytes */
43    Int          assoc;
44    Int          line_size;              /* bytes */
45    Int          sets;
46    Int          sets_min_1;
47    Int          line_size_bits;
48    Int          tag_shift;
49    Char         desc_line[128];
50    UWord*       tags;
51 } cache_t2;
52
53 /* By this point, the size/assoc/line_size has been checked. */
54 static void cachesim_initcache(cache_t config, cache_t2* c)
55 {
56    Int i;
57
58    c->size      = config.size;
59    c->assoc     = config.assoc;
60    c->line_size = config.line_size;
61
62    c->sets           = (c->size / c->line_size) / c->assoc;
63    c->sets_min_1     = c->sets - 1;
64    c->line_size_bits = VG_(log2)(c->line_size);
65    c->tag_shift      = c->line_size_bits + VG_(log2)(c->sets);
66
67    if (c->assoc == 1) {
68       VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped", 
69                                  c->size, c->line_size);
70    } else {
71       VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative",
72                                  c->size, c->line_size, c->assoc);
73    }
74
75    c->tags = VG_(malloc)("cg.sim.ci.1",
76                          sizeof(UWord) * c->sets * c->assoc);
77
78    for (i = 0; i < c->sets * c->assoc; i++)
79       c->tags[i] = 0;
80 }
81
82 /* This is done as a macro rather than by passing in the cache_t2 as an 
83  * arg because it slows things down by a small amount (3-5%) due to all 
84  * that extra indirection. */
85
86 #define CACHESIM(L, MISS_TREATMENT)                                         \
87 /* The cache and associated bits and pieces. */                             \
88 static cache_t2 L;                                                          \
89                                                                             \
90 static void cachesim_##L##_initcache(cache_t config)                        \
91 {                                                                           \
92     cachesim_initcache(config, &L);                                         \
93 }                                                                           \
94                                                                             \
95 /* This attribute forces GCC to inline this function, even though it's */   \
96 /* bigger than its usual limit.  Inlining gains around 5--10% speedup. */   \
97 __attribute__((always_inline))                                              \
98 static __inline__                                                           \
99 void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *mL)         \
100 {                                                                           \
101    UInt  set1 = ( a         >> L.line_size_bits) & (L.sets_min_1);          \
102    UInt  set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1);          \
103    UWord tag  = a >> L.tag_shift;                                           \
104    UWord tag2;                                                              \
105    Int i, j;                                                                \
106    Bool is_miss = False;                                                    \
107    UWord* set;                                                              \
108                                                                             \
109    /* First case: word entirely within line. */                             \
110    if (set1 == set2) {                                                      \
111                                                                             \
112       set = &(L.tags[set1 * L.assoc]);                                      \
113                                                                             \
114       /* This loop is unrolled for just the first case, which is the most */\
115       /* common.  We can't unroll any further because it would screw up   */\
116       /* if we have a direct-mapped (1-way) cache.                        */\
117       if (tag == set[0]) {                                                  \
118          return;                                                            \
119       }                                                                     \
120       /* If the tag is one other than the MRU, move it into the MRU spot  */\
121       /* and shuffle the rest down.                                       */\
122       for (i = 1; i < L.assoc; i++) {                                       \
123          if (tag == set[i]) {                                               \
124             for (j = i; j > 0; j--) {                                       \
125                set[j] = set[j - 1];                                         \
126             }                                                               \
127             set[0] = tag;                                                   \
128             return;                                                         \
129          }                                                                  \
130       }                                                                     \
131                                                                             \
132       /* A miss;  install this tag as MRU, shuffle rest down. */            \
133       for (j = L.assoc - 1; j > 0; j--) {                                   \
134          set[j] = set[j - 1];                                               \
135       }                                                                     \
136       set[0] = tag;                                                         \
137       MISS_TREATMENT;                                                       \
138       return;                                                               \
139                                                                             \
140    /* Second case: word straddles two lines. */                             \
141    /* Nb: this is a fast way of doing ((set1+1) % L.sets) */                \
142    } else if (((set1 + 1) & (L.sets-1)) == set2) {                          \
143       set = &(L.tags[set1 * L.assoc]);                                      \
144       if (tag == set[0]) {                                                  \
145          goto block2;                                                       \
146       }                                                                     \
147       for (i = 1; i < L.assoc; i++) {                                       \
148          if (tag == set[i]) {                                               \
149             for (j = i; j > 0; j--) {                                       \
150                set[j] = set[j - 1];                                         \
151             }                                                               \
152             set[0] = tag;                                                   \
153             goto block2;                                                    \
154          }                                                                  \
155       }                                                                     \
156       for (j = L.assoc - 1; j > 0; j--) {                                   \
157          set[j] = set[j - 1];                                               \
158       }                                                                     \
159       set[0] = tag;                                                         \
160       is_miss = True;                                                       \
161 block2:                                                                     \
162       set = &(L.tags[set2 * L.assoc]);                                      \
163       tag2 = (a+size-1) >> L.tag_shift;                                     \
164       if (tag2 == set[0]) {                                                 \
165          goto miss_treatment;                                               \
166       }                                                                     \
167       for (i = 1; i < L.assoc; i++) {                                       \
168          if (tag2 == set[i]) {                                              \
169             for (j = i; j > 0; j--) {                                       \
170                set[j] = set[j - 1];                                         \
171             }                                                               \
172             set[0] = tag2;                                                  \
173             goto miss_treatment;                                            \
174          }                                                                  \
175       }                                                                     \
176       for (j = L.assoc - 1; j > 0; j--) {                                   \
177          set[j] = set[j - 1];                                               \
178       }                                                                     \
179       set[0] = tag2;                                                        \
180       is_miss = True;                                                       \
181 miss_treatment:                                                             \
182       if (is_miss) { MISS_TREATMENT; }                                      \
183                                                                             \
184    } else {                                                                 \
185        VG_(printf)("addr: %lx  size: %u  sets: %d %d", a, size, set1, set2);\
186        VG_(tool_panic)("item straddles more than two cache sets");          \
187    }                                                                        \
188    return;                                                                  \
189 }
190
191 CACHESIM(LL, (*mL)++ );
192 CACHESIM(I1, { (*m1)++; cachesim_LL_doref(a, size, m1, mL); } );
193 CACHESIM(D1, { (*m1)++; cachesim_LL_doref(a, size, m1, mL); } );
194
195 /*--------------------------------------------------------------------*/
196 /*--- end                                                 cg_sim.c ---*/
197 /*--------------------------------------------------------------------*/
198