]> rtime.felk.cvut.cz Git - l4.git/blobdiff - l4/pkg/valgrind/src/valgrind-3.6.0-svn/cachegrind/cg_main.c
update
[l4.git] / l4 / pkg / valgrind / src / valgrind-3.6.0-svn / cachegrind / cg_main.c
index cb0af568218a7e68a1adcc2e667a4e545d12737a..ecdd706f0100918c1eff28282651dec6ffe6e0ff 100644 (file)
@@ -77,7 +77,7 @@ typedef
    struct {
       ULong a;  /* total # memory accesses of this kind */
       ULong m1; /* misses in the first level cache */
-      ULong m2; /* misses in the second level cache */
+      ULong mL; /* misses in the second level cache */
    }
    CacheCC;
 
@@ -268,13 +268,13 @@ static LineCC* get_lineCC(Addr origAddr)
       lineCC->loc.line = loc.line;
       lineCC->Ir.a     = 0;
       lineCC->Ir.m1    = 0;
-      lineCC->Ir.m2    = 0;
+      lineCC->Ir.mL    = 0;
       lineCC->Dr.a     = 0;
       lineCC->Dr.m1    = 0;
-      lineCC->Dr.m2    = 0;
+      lineCC->Dr.mL    = 0;
       lineCC->Dw.a     = 0;
       lineCC->Dw.m1    = 0;
-      lineCC->Dw.m2    = 0;
+      lineCC->Dw.mL    = 0;
       lineCC->Bc.b     = 0;
       lineCC->Bc.mp    = 0;
       lineCC->Bi.b     = 0;
@@ -289,13 +289,37 @@ static LineCC* get_lineCC(Addr origAddr)
 /*--- Cache simulation functions                           ---*/
 /*------------------------------------------------------------*/
 
+// Only used with --cache-sim=no.
+static VG_REGPARM(1)
+void log_1I(InstrInfo* n)
+{
+   n->parent->Ir.a++;
+}
+
+// Only used with --cache-sim=no.
+static VG_REGPARM(2)
+void log_2I(InstrInfo* n, InstrInfo* n2)
+{
+   n->parent->Ir.a++;
+   n2->parent->Ir.a++;
+}
+
+// Only used with --cache-sim=no.
+static VG_REGPARM(3)
+void log_3I(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
+{
+   n->parent->Ir.a++;
+   n2->parent->Ir.a++;
+   n3->parent->Ir.a++;
+}
+
 static VG_REGPARM(1)
 void log_1I_0D_cache_access(InstrInfo* n)
 {
    //VG_(printf)("1I_0D :  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n",
    //             n, n->instr_addr, n->instr_len);
    cachesim_I1_doref(n->instr_addr, n->instr_len, 
-                     &n->parent->Ir.m1, &n->parent->Ir.m2);
+                     &n->parent->Ir.m1, &n->parent->Ir.mL);
    n->parent->Ir.a++;
 }
 
@@ -307,10 +331,10 @@ void log_2I_0D_cache_access(InstrInfo* n, InstrInfo* n2)
    //            n,  n->instr_addr,  n->instr_len,
    //            n2, n2->instr_addr, n2->instr_len);
    cachesim_I1_doref(n->instr_addr, n->instr_len, 
-                     &n->parent->Ir.m1, &n->parent->Ir.m2);
+                     &n->parent->Ir.m1, &n->parent->Ir.mL);
    n->parent->Ir.a++;
    cachesim_I1_doref(n2->instr_addr, n2->instr_len, 
-                     &n2->parent->Ir.m1, &n2->parent->Ir.m2);
+                     &n2->parent->Ir.m1, &n2->parent->Ir.mL);
    n2->parent->Ir.a++;
 }
 
@@ -324,13 +348,13 @@ void log_3I_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
    //            n2, n2->instr_addr, n2->instr_len,
    //            n3, n3->instr_addr, n3->instr_len);
    cachesim_I1_doref(n->instr_addr, n->instr_len, 
-                     &n->parent->Ir.m1, &n->parent->Ir.m2);
+                     &n->parent->Ir.m1, &n->parent->Ir.mL);
    n->parent->Ir.a++;
    cachesim_I1_doref(n2->instr_addr, n2->instr_len, 
-                     &n2->parent->Ir.m1, &n2->parent->Ir.m2);
+                     &n2->parent->Ir.m1, &n2->parent->Ir.mL);
    n2->parent->Ir.a++;
    cachesim_I1_doref(n3->instr_addr, n3->instr_len, 
-                     &n3->parent->Ir.m1, &n3->parent->Ir.m2);
+                     &n3->parent->Ir.m1, &n3->parent->Ir.mL);
    n3->parent->Ir.a++;
 }
 
@@ -341,11 +365,11 @@ void log_1I_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
    //            "                               daddr=0x%010lx,  dsize=%lu\n",
    //            n, n->instr_addr, n->instr_len, data_addr, data_size);
    cachesim_I1_doref(n->instr_addr, n->instr_len, 
-                     &n->parent->Ir.m1, &n->parent->Ir.m2);
+                     &n->parent->Ir.m1, &n->parent->Ir.mL);
    n->parent->Ir.a++;
 
    cachesim_D1_doref(data_addr, data_size, 
-                     &n->parent->Dr.m1, &n->parent->Dr.m2);
+                     &n->parent->Dr.m1, &n->parent->Dr.mL);
    n->parent->Dr.a++;
 }
 
@@ -356,11 +380,11 @@ void log_1I_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
    //            "                               daddr=0x%010lx,  dsize=%lu\n",
    //            n, n->instr_addr, n->instr_len, data_addr, data_size);
    cachesim_I1_doref(n->instr_addr, n->instr_len, 
-                     &n->parent->Ir.m1, &n->parent->Ir.m2);
+                     &n->parent->Ir.m1, &n->parent->Ir.mL);
    n->parent->Ir.a++;
 
    cachesim_D1_doref(data_addr, data_size, 
-                     &n->parent->Dw.m1, &n->parent->Dw.m2);
+                     &n->parent->Dw.m1, &n->parent->Dw.mL);
    n->parent->Dw.a++;
 }
 
@@ -370,7 +394,7 @@ void log_0I_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
    //VG_(printf)("0I_1Dr:  CCaddr=0x%010lx,  daddr=0x%010lx,  dsize=%lu\n",
    //            n, data_addr, data_size);
    cachesim_D1_doref(data_addr, data_size, 
-                     &n->parent->Dr.m1, &n->parent->Dr.m2);
+                     &n->parent->Dr.m1, &n->parent->Dr.mL);
    n->parent->Dr.a++;
 }
 
@@ -380,7 +404,7 @@ void log_0I_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
    //VG_(printf)("0I_1Dw:  CCaddr=0x%010lx,  daddr=0x%010lx,  dsize=%lu\n",
    //            n, data_addr, data_size);
    cachesim_D1_doref(data_addr, data_size, 
-                     &n->parent->Dw.m1, &n->parent->Dw.m2);
+                     &n->parent->Dw.m1, &n->parent->Dw.mL);
    n->parent->Dw.a++;
 }
 
@@ -708,8 +732,13 @@ static void flushEvents ( CgState* cgs )
             else
             if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir)
             {
-               helperName = "log_3I_0D_cache_access";
-               helperAddr = &log_3I_0D_cache_access;
+               if (clo_cache_sim) {
+                  helperName = "log_3I_0D_cache_access";
+                  helperAddr = &log_3I_0D_cache_access;
+               } else {
+                  helperName = "log_3I";
+                  helperAddr = &log_3I;
+               }
                argv = mkIRExprVec_3( i_node_expr, 
                                      mkIRExpr_HWord( (HWord)ev2->inode ), 
                                      mkIRExpr_HWord( (HWord)ev3->inode ) );
@@ -719,8 +748,13 @@ static void flushEvents ( CgState* cgs )
             /* Merge an Ir with one following Ir. */
             else
             if (ev2 && ev2->tag == Ev_Ir) {
-               helperName = "log_2I_0D_cache_access";
-               helperAddr = &log_2I_0D_cache_access;
+               if (clo_cache_sim) {
+                  helperName = "log_2I_0D_cache_access";
+                  helperAddr = &log_2I_0D_cache_access;
+               } else {
+                  helperName = "log_2I";
+                  helperAddr = &log_2I;
+               }
                argv = mkIRExprVec_2( i_node_expr,
                                      mkIRExpr_HWord( (HWord)ev2->inode ) );
                regparms = 2;
@@ -728,8 +762,13 @@ static void flushEvents ( CgState* cgs )
             }
             /* No merging possible; emit as-is. */
             else {
-               helperName = "log_1I_0D_cache_access";
-               helperAddr = &log_1I_0D_cache_access;
+               if (clo_cache_sim) {
+                  helperName = "log_1I_0D_cache_access";
+                  helperAddr = &log_1I_0D_cache_access;
+               } else {
+                  helperName = "log_1I";
+                  helperAddr = &log_1I;
+               }
                argv = mkIRExprVec_1( i_node_expr );
                regparms = 1;
                i++;
@@ -1070,60 +1109,66 @@ IRSB* cg_instrument ( VgCallbackClosure* closure,
          }
 
          case Ist_Exit: {
-            /* Stuff to widen the guard expression to a host word, so
-               we can pass it to the branch predictor simulation
-               functions easily. */
-            Bool     inverted;
-            Addr64   nia, sea;
-            IRConst* dst;
-            IRType   tyW    = hWordTy;
-            IROp     widen  = tyW==Ity_I32  ? Iop_1Uto32  : Iop_1Uto64;
-            IROp     opXOR  = tyW==Ity_I32  ? Iop_Xor32   : Iop_Xor64;
-            IRTemp   guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1);
-            IRTemp   guardW = newIRTemp(cgs.sbOut->tyenv, tyW);
-            IRTemp   guard  = newIRTemp(cgs.sbOut->tyenv, tyW);
-            IRExpr*  one    = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
-                                           : IRExpr_Const(IRConst_U64(1));
-
-            /* First we need to figure out whether the side exit got
-               inverted by the ir optimiser.  To do that, figure out
-               the next (fallthrough) instruction's address and the
-               side exit address and see if they are the same. */
-            nia = cia + (Addr64)isize;
-            if (tyW == Ity_I32) 
-               nia &= 0xFFFFFFFFULL;
-
-            /* Side exit address */
-            dst = st->Ist.Exit.dst;
-            if (tyW == Ity_I32) {
-               tl_assert(dst->tag == Ico_U32);
-               sea = (Addr64)(UInt)dst->Ico.U32;
-            } else {
-               tl_assert(tyW == Ity_I64);
-               tl_assert(dst->tag == Ico_U64);
-               sea = dst->Ico.U64;
+            // call branch predictor only if this is a branch in guest code
+            if ( (st->Ist.Exit.jk == Ijk_Boring) ||
+                 (st->Ist.Exit.jk == Ijk_Call) ||
+                 (st->Ist.Exit.jk == Ijk_Ret) )
+            {
+               /* Stuff to widen the guard expression to a host word, so
+                  we can pass it to the branch predictor simulation
+                  functions easily. */
+               Bool     inverted;
+               Addr64   nia, sea;
+               IRConst* dst;
+               IRType   tyW    = hWordTy;
+               IROp     widen  = tyW==Ity_I32  ? Iop_1Uto32  : Iop_1Uto64;
+               IROp     opXOR  = tyW==Ity_I32  ? Iop_Xor32   : Iop_Xor64;
+               IRTemp   guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1);
+               IRTemp   guardW = newIRTemp(cgs.sbOut->tyenv, tyW);
+               IRTemp   guard  = newIRTemp(cgs.sbOut->tyenv, tyW);
+               IRExpr*  one    = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
+                                              : IRExpr_Const(IRConst_U64(1));
+
+               /* First we need to figure out whether the side exit got
+                  inverted by the ir optimiser.  To do that, figure out
+                  the next (fallthrough) instruction's address and the
+                  side exit address and see if they are the same. */
+               nia = cia + (Addr64)isize;
+               if (tyW == Ity_I32)
+                  nia &= 0xFFFFFFFFULL;
+
+               /* Side exit address */
+               dst = st->Ist.Exit.dst;
+               if (tyW == Ity_I32) {
+                  tl_assert(dst->tag == Ico_U32);
+                  sea = (Addr64)(UInt)dst->Ico.U32;
+               } else {
+                  tl_assert(tyW == Ity_I64);
+                  tl_assert(dst->tag == Ico_U64);
+                  sea = dst->Ico.U64;
+               }
+
+               inverted = nia == sea;
+
+               /* Widen the guard expression. */
+               addStmtToIRSB( cgs.sbOut,
+                              IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
+               addStmtToIRSB( cgs.sbOut,
+                              IRStmt_WrTmp( guardW,
+                                            IRExpr_Unop(widen,
+                                                        IRExpr_RdTmp(guard1))) );
+               /* If the exit is inverted, invert the sense of the guard. */
+               addStmtToIRSB(
+                     cgs.sbOut,
+                     IRStmt_WrTmp(
+                           guard,
+                           inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
+                                    : IRExpr_RdTmp(guardW)
+                              ));
+               /* And post the event. */
+               addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) );
             }
 
-            inverted = nia == sea;
-
-            /* Widen the guard expression. */
-            addStmtToIRSB( cgs.sbOut, 
-                           IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
-            addStmtToIRSB( cgs.sbOut,
-                           IRStmt_WrTmp( guardW,
-                                         IRExpr_Unop(widen, 
-                                                     IRExpr_RdTmp(guard1))) );
-            /* If the exit is inverted, invert the sense of the guard. */
-            addStmtToIRSB( 
-               cgs.sbOut,
-               IRStmt_WrTmp( 
-                  guard,
-                  inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
-                           : IRExpr_RdTmp(guardW) 
-               ));
-            /* And post the event. */
-            addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) );
-
             /* We may never reach the next statement, so need to flush
                all outstanding transactions now. */
             flushEvents( &cgs );
@@ -1147,7 +1192,7 @@ IRSB* cg_instrument ( VgCallbackClosure* closure,
    /* Deal with branches to unknown destinations.  Except ignore ones
       which are function returns as we assume the return stack
       predictor never mispredicts. */
-   if (sbIn->jumpkind == Ijk_Boring) {
+   if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
       if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
       switch (sbIn->next->tag) {
          case Iex_Const: 
@@ -1189,7 +1234,7 @@ IRSB* cg_instrument ( VgCallbackClosure* closure,
 
 static cache_t clo_I1_cache = UNDEFINED_CACHE;
 static cache_t clo_D1_cache = UNDEFINED_CACHE;
-static cache_t clo_L2_cache = UNDEFINED_CACHE;
+static cache_t clo_LL_cache = UNDEFINED_CACHE;
 
 // Checks cache config is ok.  Returns NULL if ok, or a pointer to an error
 // string otherwise.
@@ -1228,7 +1273,7 @@ static Char* check_cache(cache_t* cache)
 }
 
 static 
-void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
+void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* LLc)
 {
 #define DEFINED(L)   (-1 != L.size  || -1 != L.assoc || -1 != L.line_size)
 
@@ -1238,22 +1283,22 @@ void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
    Bool all_caches_clo_defined =
       (DEFINED(clo_I1_cache) &&
        DEFINED(clo_D1_cache) &&
-       DEFINED(clo_L2_cache));
+       DEFINED(clo_LL_cache));
 
    // Set the cache config (using auto-detection, if supported by the
    // architecture).
-   VG_(configure_caches)( I1c, D1c, L2c, all_caches_clo_defined );
+   VG_(configure_caches)( I1c, D1c, LLc, all_caches_clo_defined );
 
    // Check the default/auto-detected values.
    checkRes = check_cache(I1c);  tl_assert(!checkRes);
    checkRes = check_cache(D1c);  tl_assert(!checkRes);
-   checkRes = check_cache(L2c);  tl_assert(!checkRes);
+   checkRes = check_cache(LLc);  tl_assert(!checkRes);
 
    // Then replace with any defined on the command line.  (Already checked in
    // parse_cache_opt().)
    if (DEFINED(clo_I1_cache)) { *I1c = clo_I1_cache; }
    if (DEFINED(clo_D1_cache)) { *D1c = clo_D1_cache; }
-   if (DEFINED(clo_L2_cache)) { *L2c = clo_L2_cache; }
+   if (DEFINED(clo_LL_cache)) { *LLc = clo_LL_cache; }
 
    if (VG_(clo_verbosity) >= 2) {
       VG_(umsg)("Cache configuration used:\n");
@@ -1261,8 +1306,8 @@ void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
                 I1c->size, I1c->assoc, I1c->line_size);
       VG_(umsg)("  D1: %dB, %d-way, %dB lines\n",
                 D1c->size, D1c->assoc, D1c->line_size);
-      VG_(umsg)("  L2: %dB, %d-way, %dB lines\n",
-                L2c->size, L2c->assoc, L2c->line_size);
+      VG_(umsg)("  LL: %dB, %d-way, %dB lines\n",
+                LLc->size, LLc->assoc, LLc->line_size);
    }
 #undef CMD_LINE_DEFINED
 }
@@ -1309,12 +1354,12 @@ static void fprint_CC_table_and_calc_totals(void)
       VG_(free)(cachegrind_out_file);
    }
 
-   // "desc:" lines (giving I1/D1/L2 cache configuration).  The spaces after
+   // "desc:" lines (giving I1/D1/LL cache configuration).  The spaces after
    // the 2nd colon makes cg_annotate's output look nicer.
    VG_(sprintf)(buf, "desc: I1 cache:         %s\n"
                      "desc: D1 cache:         %s\n"
-                     "desc: L2 cache:         %s\n",
-                     I1.desc_line, D1.desc_line, L2.desc_line);
+                     "desc: LL cache:         %s\n",
+                     I1.desc_line, D1.desc_line, LL.desc_line);
    VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
 
    // "cmd:" line
@@ -1334,19 +1379,20 @@ static void fprint_CC_table_and_calc_totals(void)
    }
    // "events:" line
    if (clo_cache_sim && clo_branch_sim) {
-      VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw "
+      VG_(sprintf)(buf, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
                                   "Bc Bcm Bi Bim\n");
    }
    else if (clo_cache_sim && !clo_branch_sim) {
-      VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw "
+      VG_(sprintf)(buf, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
                                   "\n");
    }
    else if (!clo_cache_sim && clo_branch_sim) {
       VG_(sprintf)(buf, "\nevents: Ir "
                                   "Bc Bcm Bi Bim\n");
    }
-   else
-      tl_assert(0); /* can't happen */
+   else {
+      VG_(sprintf)(buf, "\nevents: Ir\n");
+   }
 
    VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
 
@@ -1384,9 +1430,9 @@ static void fprint_CC_table_and_calc_totals(void)
                              " %llu %llu %llu"
                              " %llu %llu %llu %llu\n",
                             lineCC->loc.line,
-                            lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.m2
-                            lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.m2,
-                            lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.m2,
+                            lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL
+                            lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
+                            lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL,
                             lineCC->Bc.b, lineCC->Bc.mp, 
                             lineCC->Bi.b, lineCC->Bi.mp);
       }
@@ -1395,9 +1441,9 @@ static void fprint_CC_table_and_calc_totals(void)
                              " %llu %llu %llu"
                              " %llu %llu %llu\n",
                             lineCC->loc.line,
-                            lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.m2
-                            lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.m2,
-                            lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.m2);
+                            lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL
+                            lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
+                            lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL);
       }
       else if (!clo_cache_sim && clo_branch_sim) {
          VG_(sprintf)(buf, "%u %llu"
@@ -1407,21 +1453,24 @@ static void fprint_CC_table_and_calc_totals(void)
                             lineCC->Bc.b, lineCC->Bc.mp, 
                             lineCC->Bi.b, lineCC->Bi.mp);
       }
-      else
-         tl_assert(0);
+      else {
+         VG_(sprintf)(buf, "%u %llu\n",
+                            lineCC->loc.line,
+                            lineCC->Ir.a);
+      }
 
       VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
 
       // Update summary stats
       Ir_total.a  += lineCC->Ir.a;
       Ir_total.m1 += lineCC->Ir.m1;
-      Ir_total.m2 += lineCC->Ir.m2;
+      Ir_total.mL += lineCC->Ir.mL;
       Dr_total.a  += lineCC->Dr.a;
       Dr_total.m1 += lineCC->Dr.m1;
-      Dr_total.m2 += lineCC->Dr.m2;
+      Dr_total.mL += lineCC->Dr.mL;
       Dw_total.a  += lineCC->Dw.a;
       Dw_total.m1 += lineCC->Dw.m1;
-      Dw_total.m2 += lineCC->Dw.m2;
+      Dw_total.mL += lineCC->Dw.mL;
       Bc_total.b  += lineCC->Bc.b;
       Bc_total.mp += lineCC->Bc.mp;
       Bi_total.b  += lineCC->Bi.b;
@@ -1438,9 +1487,9 @@ static void fprint_CC_table_and_calc_totals(void)
                         " %llu %llu %llu"
                         " %llu %llu %llu"
                         " %llu %llu %llu %llu\n", 
-                        Ir_total.a, Ir_total.m1, Ir_total.m2,
-                        Dr_total.a, Dr_total.m1, Dr_total.m2,
-                        Dw_total.a, Dw_total.m1, Dw_total.m2,
+                        Ir_total.a, Ir_total.m1, Ir_total.mL,
+                        Dr_total.a, Dr_total.m1, Dr_total.mL,
+                        Dw_total.a, Dw_total.m1, Dw_total.mL,
                         Bc_total.b, Bc_total.mp, 
                         Bi_total.b, Bi_total.mp);
    }
@@ -1449,9 +1498,9 @@ static void fprint_CC_table_and_calc_totals(void)
                         " %llu %llu %llu"
                         " %llu %llu %llu"
                         " %llu %llu %llu\n",
-                        Ir_total.a, Ir_total.m1, Ir_total.m2,
-                        Dr_total.a, Dr_total.m1, Dr_total.m2,
-                        Dw_total.a, Dw_total.m1, Dw_total.m2);
+                        Ir_total.a, Ir_total.m1, Ir_total.mL,
+                        Dr_total.a, Dr_total.m1, Dr_total.mL,
+                        Dw_total.a, Dw_total.m1, Dw_total.mL);
    }
    else if (!clo_cache_sim && clo_branch_sim) {
       VG_(sprintf)(buf, "summary:"
@@ -1461,8 +1510,11 @@ static void fprint_CC_table_and_calc_totals(void)
                         Bc_total.b, Bc_total.mp, 
                         Bi_total.b, Bi_total.mp);
    }
-   else
-      tl_assert(0);
+   else {
+      VG_(sprintf)(buf, "summary:"
+                        " %llu\n", 
+                        Ir_total.a);
+   }
 
    VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
    VG_(close)(fd);
@@ -1485,14 +1537,10 @@ static void cg_fini(Int exitcode)
 
    CacheCC  D_total;
    BranchCC B_total;
-   ULong L2_total_m, L2_total_mr, L2_total_mw,
-         L2_total, L2_total_r, L2_total_w;
+   ULong LL_total_m, LL_total_mr, LL_total_mw,
+         LL_total, LL_total_r, LL_total_w;
    Int l1, l2, l3;
 
-   /* Running with both cache and branch simulation disabled is not
-      allowed (checked during command line option processing). */
-   tl_assert(clo_cache_sim || clo_branch_sim);
-
    fprint_CC_table_and_calc_totals();
 
    if (VG_(clo_verbosity) == 0) 
@@ -1517,21 +1565,21 @@ static void cg_fini(Int exitcode)
       miss numbers */
    if (clo_cache_sim) {
       VG_(umsg)(fmt, "I1  misses:   ", Ir_total.m1);
-      VG_(umsg)(fmt, "L2i misses:   ", Ir_total.m2);
+      VG_(umsg)(fmt, "LLi misses:   ", Ir_total.mL);
 
       if (0 == Ir_total.a) Ir_total.a = 1;
       VG_(percentify)(Ir_total.m1, Ir_total.a, 2, l1+1, buf1);
       VG_(umsg)("I1  miss rate: %s\n", buf1);
 
-      VG_(percentify)(Ir_total.m2, Ir_total.a, 2, l1+1, buf1);
-      VG_(umsg)("L2i miss rate: %s\n", buf1);
+      VG_(percentify)(Ir_total.mL, Ir_total.a, 2, l1+1, buf1);
+      VG_(umsg)("LLi miss rate: %s\n", buf1);
       VG_(umsg)("\n");
 
       /* D cache results.  Use the D_refs.rd and D_refs.wr values to
        * determine the width of columns 2 & 3. */
       D_total.a  = Dr_total.a  + Dw_total.a;
       D_total.m1 = Dr_total.m1 + Dw_total.m1;
-      D_total.m2 = Dr_total.m2 + Dw_total.m2;
+      D_total.mL = Dr_total.mL + Dw_total.mL;
 
       /* Make format string, getting width right for numbers */
       VG_(sprintf)(fmt, "%%s %%,%dllu  (%%,%dllu rd   + %%,%dllu wr)\n",
@@ -1541,8 +1589,8 @@ static void cg_fini(Int exitcode)
                      D_total.a, Dr_total.a, Dw_total.a);
       VG_(umsg)(fmt, "D1  misses:   ",
                      D_total.m1, Dr_total.m1, Dw_total.m1);
-      VG_(umsg)(fmt, "L2d misses:   ",
-                     D_total.m2, Dr_total.m2, Dw_total.m2);
+      VG_(umsg)(fmt, "LLd misses:   ",
+                     D_total.mL, Dr_total.mL, Dw_total.mL);
 
       if (0 == D_total.a)  D_total.a = 1;
       if (0 == Dr_total.a) Dr_total.a = 1;
@@ -1552,30 +1600,30 @@ static void cg_fini(Int exitcode)
       VG_(percentify)(Dw_total.m1, Dw_total.a, 1, l3+1, buf3);
       VG_(umsg)("D1  miss rate: %s (%s     + %s  )\n", buf1, buf2,buf3);
 
-      VG_(percentify)( D_total.m2,  D_total.a, 1, l1+1, buf1);
-      VG_(percentify)(Dr_total.m2, Dr_total.a, 1, l2+1, buf2);
-      VG_(percentify)(Dw_total.m2, Dw_total.a, 1, l3+1, buf3);
-      VG_(umsg)("L2d miss rate: %s (%s     + %s  )\n", buf1, buf2,buf3);
+      VG_(percentify)( D_total.mL,  D_total.a, 1, l1+1, buf1);
+      VG_(percentify)(Dr_total.mL, Dr_total.a, 1, l2+1, buf2);
+      VG_(percentify)(Dw_total.mL, Dw_total.a, 1, l3+1, buf3);
+      VG_(umsg)("LLd miss rate: %s (%s     + %s  )\n", buf1, buf2,buf3);
       VG_(umsg)("\n");
 
-      /* L2 overall results */
+      /* LL overall results */
 
-      L2_total   = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
-      L2_total_r = Dr_total.m1 + Ir_total.m1;
-      L2_total_w = Dw_total.m1;
-      VG_(umsg)(fmt, "L2 refs:      ",
-                     L2_total, L2_total_r, L2_total_w);
+      LL_total   = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
+      LL_total_r = Dr_total.m1 + Ir_total.m1;
+      LL_total_w = Dw_total.m1;
+      VG_(umsg)(fmt, "LL refs:      ",
+                     LL_total, LL_total_r, LL_total_w);
 
-      L2_total_m  = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
-      L2_total_mr = Dr_total.m2 + Ir_total.m2;
-      L2_total_mw = Dw_total.m2;
-      VG_(umsg)(fmt, "L2 misses:    ",
-                     L2_total_m, L2_total_mr, L2_total_mw);
+      LL_total_m  = Dr_total.mL + Dw_total.mL + Ir_total.mL;
+      LL_total_mr = Dr_total.mL + Ir_total.mL;
+      LL_total_mw = Dw_total.mL;
+      VG_(umsg)(fmt, "LL misses:    ",
+                     LL_total_m, LL_total_mr, LL_total_mw);
 
-      VG_(percentify)(L2_total_m,  (Ir_total.a + D_total.a),  1, l1+1, buf1);
-      VG_(percentify)(L2_total_mr, (Ir_total.a + Dr_total.a), 1, l2+1, buf2);
-      VG_(percentify)(L2_total_mw, Dw_total.a,                1, l3+1, buf3);
-      VG_(umsg)("L2 miss rate:  %s (%s     + %s  )\n", buf1, buf2,buf3);
+      VG_(percentify)(LL_total_m,  (Ir_total.a + D_total.a),  1, l1+1, buf1);
+      VG_(percentify)(LL_total_mr, (Ir_total.a + Dr_total.a), 1, l2+1, buf2);
+      VG_(percentify)(LL_total_mw, Dw_total.a,                1, l3+1, buf3);
+      VG_(umsg)("LL miss rate:  %s (%s     + %s  )\n", buf1, buf2,buf3);
    }
 
    /* If branch profiling is enabled, show branch overall results. */
@@ -1712,8 +1760,9 @@ static Bool cg_process_cmd_line_option(Char* arg)
       parse_cache_opt(&clo_I1_cache, arg, tmp_str);
    else if VG_STR_CLO(arg, "--D1", tmp_str)
       parse_cache_opt(&clo_D1_cache, arg, tmp_str);
-   else if VG_STR_CLO(arg, "--L2", tmp_str)
-      parse_cache_opt(&clo_L2_cache, arg, tmp_str);
+   else if (VG_STR_CLO(arg, "--L2", tmp_str) || // for backwards compatibility
+            VG_STR_CLO(arg, "--LL", tmp_str))
+      parse_cache_opt(&clo_LL_cache, arg, tmp_str);
 
    else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {}
    else if VG_BOOL_CLO(arg, "--cache-sim",  clo_cache_sim)  {}
@@ -1729,7 +1778,7 @@ static void cg_print_usage(void)
    VG_(printf)(
 "    --I1=<size>,<assoc>,<line_size>  set I1 cache manually\n"
 "    --D1=<size>,<assoc>,<line_size>  set D1 cache manually\n"
-"    --L2=<size>,<assoc>,<line_size>  set L2 cache manually\n"
+"    --LL=<size>,<assoc>,<line_size>  set LL cache manually\n"
 "    --cache-sim=yes|no  [yes]        collect cache stats?\n"
 "    --branch-sim=yes|no [no]         collect branch prediction stats?\n"
 "    --cachegrind-out-file=<file>     output file name [cachegrind.out.%%p]\n"
@@ -1771,15 +1820,7 @@ static void cg_pre_clo_init(void)
 
 static void cg_post_clo_init(void)
 {
-   cache_t I1c, D1c, L2c; 
-
-   /* Can't disable both cache and branch profiling */
-   if ((!clo_cache_sim) && (!clo_branch_sim)) {
-      VG_(umsg)("ERROR: --cache-sim=no --branch-sim=no is not allowed.\n");
-      VG_(umsg)("You must select cache profiling, "
-                "or branch profiling, or both.\n");
-      VG_(exit)(2);
-   }
+   cache_t I1c, D1c, LLc; 
 
    CC_table =
       VG_(OSetGen_Create)(offsetof(LineCC, loc),
@@ -1797,11 +1838,11 @@ static void cg_post_clo_init(void)
                           VG_(malloc), "cg.main.cpci.3",
                           VG_(free));
 
-   configure_caches(&I1c, &D1c, &L2c);
+   configure_caches(&I1c, &D1c, &LLc);
 
    cachesim_I1_initcache(I1c);
    cachesim_D1_initcache(D1c);
-   cachesim_L2_initcache(L2c);
+   cachesim_LL_initcache(LLc);
 }
 
 VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init)