drivers/misc/tegra-profiler/hrt.c

   1 /*
   2  * drivers/misc/tegra-profiler/hrt.c
   3  *
   4  * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
   5  *
   6  * This program is free software; you can redistribute it and/or modify it
   7  * under the terms and conditions of the GNU General Public License,
   8  * version 2, as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13  * more details.
  14  *
  15  */
  16
  17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  18
  19 #include <linux/sched.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/slab.h>
  22 #include <linux/cpu.h>
  23 #include <linux/ptrace.h>
  24 #include <linux/interrupt.h>
  25 #include <linux/err.h>
  26 #include <linux/nsproxy.h>
  27 #include <clocksource/arm_arch_timer.h>
  28
  29 #include <asm/cputype.h>
  30 #include <asm/irq_regs.h>
  31 #include <asm/arch_timer.h>
  32
  33 #include <linux/tegra_profiler.h>
  34
  35 #include "quadd.h"
  36 #include "hrt.h"
  37 #include "comm.h"
  38 #include "mmap.h"
  39 #include "ma.h"
  40 #include "power_clk.h"
  41 #include "tegra.h"
  42 #include "debug.h"
  43
  44 static struct quadd_hrt_ctx hrt;
  45
  46 static void
  47 read_all_sources(struct pt_regs *regs, struct task_struct *task);
  48
  49 struct hrt_event_value {
  50         int event_id;
  51         u32 value;
  52 };
  53
  54 static enum hrtimer_restart hrtimer_handler(struct hrtimer *hrtimer)
  55 {
  56         struct pt_regs *regs;
  57
  58         regs = get_irq_regs();
  59
  60         if (!hrt.active)
  61                 return HRTIMER_NORESTART;
  62
  63         qm_debug_handler_sample(regs);
  64
  65         if (regs)
  66                 read_all_sources(regs, NULL);
  67
  68         hrtimer_forward_now(hrtimer, ns_to_ktime(hrt.sample_period));
  69         qm_debug_timer_forward(regs, hrt.sample_period);
  70
  71         return HRTIMER_RESTART;
  72 }
  73
  74 static void start_hrtimer(struct quadd_cpu_context *cpu_ctx)
  75 {
  76         u64 period = hrt.sample_period;
  77
  78         __hrtimer_start_range_ns(&cpu_ctx->hrtimer,
  79                                  ns_to_ktime(period), 0,
  80                                  HRTIMER_MODE_REL_PINNED, 0);
  81         qm_debug_timer_start(NULL, period);
  82 }
  83
  84 static void cancel_hrtimer(struct quadd_cpu_context *cpu_ctx)
  85 {
  86         hrtimer_cancel(&cpu_ctx->hrtimer);
  87         qm_debug_timer_cancel();
  88 }
  89
  90 static void init_hrtimer(struct quadd_cpu_context *cpu_ctx)
  91 {
  92         hrtimer_init(&cpu_ctx->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  93         cpu_ctx->hrtimer.function = hrtimer_handler;
  94 }
  95
  96 static inline u64 get_posix_clock_monotonic_time(void)
  97 {
  98         struct timespec ts;
  99
 100         do_posix_clock_monotonic_gettime(&ts);
 101         return timespec_to_ns(&ts);
 102 }
 103
 104 static inline u64 get_arch_time(struct timecounter *tc)
 105 {
 106         cycle_t value;
 107         const struct cyclecounter *cc = tc->cc;
 108
 109         value = cc->read(cc);
 110         return cyclecounter_cyc2ns(cc, value);
 111 }
 112
 113 u64 quadd_get_time(void)
 114 {
 115         struct timecounter *tc = hrt.tc;
 116
 117         if (tc)
 118                 return get_arch_time(tc);
 119         else
 120                 return get_posix_clock_monotonic_time();
 121 }
 122
 123 static void put_header(void)
 124 {
 125         int nr_events = 0, max_events = QUADD_MAX_COUNTERS;
 126         int events[QUADD_MAX_COUNTERS];
 127         struct quadd_record_data record;
 128         struct quadd_header_data *hdr = &record.hdr;
 129         struct quadd_parameters *param = &hrt.quadd_ctx->param;
 130         unsigned int extra = param->reserved[QUADD_PARAM_IDX_EXTRA];
 131         struct quadd_iovec vec;
 132         struct quadd_ctx *ctx = hrt.quadd_ctx;
 133         struct quadd_event_source_interface *pmu = ctx->pmu;
 134         struct quadd_event_source_interface *pl310 = ctx->pl310;
 135
 136         record.record_type = QUADD_RECORD_TYPE_HEADER;
 137
 138         hdr->magic = QUADD_HEADER_MAGIC;
 139         hdr->version = QUADD_SAMPLES_VERSION;
 140
 141         hdr->backtrace = param->backtrace;
 142         hdr->use_freq = param->use_freq;
 143         hdr->system_wide = param->system_wide;
 144
 145         /* TODO: dynamically */
 146 #ifdef QM_DEBUG_SAMPLES_ENABLE
 147         hdr->debug_samples = 1;
 148 #else
 149         hdr->debug_samples = 0;
 150 #endif
 151
 152         hdr->freq = param->freq;
 153         hdr->ma_freq = param->ma_freq;
 154         hdr->power_rate_freq = param->power_rate_freq;
 155
 156         hdr->power_rate = hdr->power_rate_freq > 0 ? 1 : 0;
 157         hdr->get_mmap = (extra & QUADD_PARAM_EXTRA_GET_MMAP) ? 1 : 0;
 158
 159         hdr->reserved = 0;
 160         hdr->extra_length = 0;
 161
 162         if (pmu)
 163                 nr_events += pmu->get_current_events(events, max_events);
 164
 165         if (pl310)
 166                 nr_events += pl310->get_current_events(events + nr_events,
 167                                                        max_events - nr_events);
 168
 169         hdr->nr_events = nr_events;
 170
 171         vec.base = events;
 172         vec.len = nr_events * sizeof(events[0]);
 173
 174         quadd_put_sample(&record, &vec, 1);
 175 }
 176
 177 void quadd_put_sample(struct quadd_record_data *data,
 178                       struct quadd_iovec *vec, int vec_count)
 179 {
 180         struct quadd_comm_data_interface *comm = hrt.quadd_ctx->comm;
 181
 182         comm->put_sample(data, vec, vec_count);
 183         atomic64_inc(&hrt.counter_samples);
 184 }
 185
 186 static int get_sample_data(struct quadd_sample_data *sample,
 187                            struct pt_regs *regs,
 188                            struct task_struct *task)
 189 {
 190         unsigned int cpu, flags;
 191         struct quadd_ctx *quadd_ctx = hrt.quadd_ctx;
 192
 193         cpu = quadd_get_processor_id(regs, &flags);
 194         sample->cpu = cpu;
 195
 196         sample->lp_mode =
 197                 (flags & QUADD_CPUMODE_TEGRA_POWER_CLUSTER_LP) ? 1 : 0;
 198         sample->thumb_mode = (flags & QUADD_CPUMODE_THUMB) ? 1 : 0;
 199         sample->user_mode = user_mode(regs) ? 1 : 0;
 200
 201         /* For security reasons, hide IPs from the kernel space. */
 202         if (!sample->user_mode && !quadd_ctx->collect_kernel_ips)
 203                 sample->ip = 0;
 204         else
 205                 sample->ip = instruction_pointer(regs);
 206
 207         sample->time = quadd_get_time();
 208         sample->reserved = 0;
 209         sample->pid = task->pid;
 210         sample->in_interrupt = in_interrupt() ? 1 : 0;
 211
 212         return 0;
 213 }
 214
 215 static int read_source(struct quadd_event_source_interface *source,
 216                        struct pt_regs *regs,
 217                        struct hrt_event_value *events_vals,
 218                        int max_events)
 219 {
 220         int nr_events, i;
 221         u32 prev_val, val, res_val;
 222         struct event_data events[QUADD_MAX_COUNTERS];
 223
 224         if (!source)
 225                 return 0;
 226
 227         max_events = min_t(int, max_events, QUADD_MAX_COUNTERS);
 228         nr_events = source->read(events, max_events);
 229
 230         for (i = 0; i < nr_events; i++) {
 231                 struct event_data *s = &events[i];
 232
 233                 prev_val = s->prev_val;
 234                 val = s->val;
 235
 236                 if (prev_val <= val)
 237                         res_val = val - prev_val;
 238                 else
 239                         res_val = QUADD_U32_MAX - prev_val + val;
 240
 241                 if (s->event_source == QUADD_EVENT_SOURCE_PL310) {
 242                         int nr_active = atomic_read(&hrt.nr_active_all_core);
 243                         if (nr_active > 1)
 244                                 res_val /= nr_active;
 245                 }
 246
 247                 events_vals[i].event_id = s->event_id;
 248                 events_vals[i].value = res_val;
 249         }
 250
 251         return nr_events;
 252 }
 253
 254 static void
 255 read_all_sources(struct pt_regs *regs, struct task_struct *task)
 256 {
 257         u32 state, extra_data = 0;
 258         int i, vec_idx = 0, bt_size = 0;
 259         int nr_events = 0, nr_positive_events = 0;
 260         struct pt_regs *user_regs;
 261         struct quadd_iovec vec[5];
 262         struct hrt_event_value events[QUADD_MAX_COUNTERS];
 263         u32 events_extra[QUADD_MAX_COUNTERS];
 264
 265         struct quadd_record_data record_data;
 266         struct quadd_sample_data *s = &record_data.sample;
 267
 268         struct quadd_ctx *ctx = hrt.quadd_ctx;
 269         struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
 270         struct quadd_callchain *cc = &cpu_ctx->cc;
 271
 272         if (!regs)
 273                 return;
 274
 275         if (atomic_read(&cpu_ctx->nr_active) == 0)
 276                 return;
 277
 278         if (!task)
 279                 task = current;
 280
 281         rcu_read_lock();
 282         if (!task_nsproxy(task)) {
 283                 rcu_read_unlock();
 284                 return;
 285         }
 286         rcu_read_unlock();
 287
 288         if (ctx->pmu && ctx->pmu_info.active)
 289                 nr_events += read_source(ctx->pmu, regs,
 290                                          events, QUADD_MAX_COUNTERS);
 291
 292         if (ctx->pl310 && ctx->pl310_info.active)
 293                 nr_events += read_source(ctx->pl310, regs,
 294                                          events + nr_events,
 295                                          QUADD_MAX_COUNTERS - nr_events);
 296
 297         if (!nr_events)
 298                 return;
 299
 300         if (user_mode(regs))
 301                 user_regs = regs;
 302         else
 303                 user_regs = current_pt_regs();
 304
 305         if (get_sample_data(s, regs, task))
 306                 return;
 307
 308         if (cc->cs_64)
 309                 extra_data |= QUADD_SED_IP64;
 310
 311         vec[vec_idx].base = &extra_data;
 312         vec[vec_idx].len = sizeof(extra_data);
 313         vec_idx++;
 314
 315         s->reserved = 0;
 316         cc->unw_method = QUADD_URC_SUCCESS;
 317
 318         if (ctx->param.backtrace) {
 319                 bt_size = quadd_get_user_callchain(user_regs, cc, ctx, task);
 320
 321                 if (!bt_size && !user_mode(regs)) {
 322                         unsigned long pc = instruction_pointer(user_regs);
 323
 324                         cc->nr = 0;
 325 #ifdef CONFIG_ARM64
 326                         cc->cs_64 = compat_user_mode(user_regs) ? 0 : 1;
 327 #else
 328                         cc->cs_64 = 0;
 329 #endif
 330                         bt_size += quadd_callchain_store(cc, pc,
 331                                                          QUADD_UNW_TYPE_KCTX);
 332                 }
 333
 334                 if (bt_size > 0) {
 335                         int ip_size = cc->cs_64 ? sizeof(u64) : sizeof(u32);
 336                         int nr_types = DIV_ROUND_UP(bt_size, 8);
 337
 338                         vec[vec_idx].base = cc->cs_64 ?
 339                                 (void *)cc->ip_64 : (void *)cc->ip_32;
 340                         vec[vec_idx].len = bt_size * ip_size;
 341                         vec_idx++;
 342
 343                         vec[vec_idx].base = cc->types;
 344                         vec[vec_idx].len = nr_types * sizeof(cc->types[0]);
 345                         vec_idx++;
 346                 }
 347
 348                 extra_data |= cc->unw_method << QUADD_SED_UNW_METHOD_SHIFT;
 349                 s->reserved |= cc->unw_rc << QUADD_SAMPLE_URC_SHIFT;
 350         }
 351         s->callchain_nr = bt_size;
 352
 353         record_data.record_type = QUADD_RECORD_TYPE_SAMPLE;
 354
 355         s->events_flags = 0;
 356         for (i = 0; i < nr_events; i++) {
 357                 u32 value = events[i].value;
 358                 if (value > 0) {
 359                         s->events_flags |= 1 << i;
 360                         events_extra[nr_positive_events++] = value;
 361                 }
 362         }
 363
 364         if (nr_positive_events == 0)
 365                 return;
 366
 367         vec[vec_idx].base = events_extra;
 368         vec[vec_idx].len = nr_positive_events * sizeof(events_extra[0]);
 369         vec_idx++;
 370
 371         state = task->state;
 372         if (state) {
 373                 s->state = 1;
 374                 vec[vec_idx].base = &state;
 375                 vec[vec_idx].len = sizeof(state);
 376                 vec_idx++;
 377         } else {
 378                 s->state = 0;
 379         }
 380
 381         quadd_put_sample(&record_data, vec, vec_idx);
 382 }
 383
 384 static inline int
 385 is_profile_process(struct task_struct *task)
 386 {
 387         int i;
 388         pid_t pid, profile_pid;
 389         struct quadd_ctx *ctx = hrt.quadd_ctx;
 390
 391         if (!task)
 392                 return 0;
 393
 394         pid = task->tgid;
 395
 396         for (i = 0; i < ctx->param.nr_pids; i++) {
 397                 profile_pid = ctx->param.pids[i];
 398                 if (profile_pid == pid)
 399                         return 1;
 400         }
 401         return 0;
 402 }
 403
 404 static int
 405 add_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid, pid_t tgid)
 406 {
 407         struct quadd_thread_data *t_data = &cpu_ctx->active_thread;
 408
 409         if (t_data->pid > 0 ||
 410                 atomic_read(&cpu_ctx->nr_active) > 0) {
 411                 pr_warn_once("Warning for thread: %d\n", (int)pid);
 412                 return 0;
 413         }
 414
 415         t_data->pid = pid;
 416         t_data->tgid = tgid;
 417         return 1;
 418 }
 419
 420 static int remove_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid)
 421 {
 422         struct quadd_thread_data *t_data = &cpu_ctx->active_thread;
 423
 424         if (t_data->pid < 0)
 425                 return 0;
 426
 427         if (t_data->pid == pid) {
 428                 t_data->pid = -1;
 429                 t_data->tgid = -1;
 430                 return 1;
 431         }
 432
 433         pr_warn_once("Warning for thread: %d\n", (int)pid);
 434         return 0;
 435 }
 436
 437 void __quadd_task_sched_in(struct task_struct *prev,
 438                            struct task_struct *task)
 439 {
 440         struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
 441         struct quadd_ctx *ctx = hrt.quadd_ctx;
 442         struct event_data events[QUADD_MAX_COUNTERS];
 443         /* static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 2); */
 444
 445         if (likely(!hrt.active))
 446                 return;
 447 /*
 448         if (__ratelimit(&ratelimit_state))
 449                 pr_info("sch_in, cpu: %d, prev: %u (%u) \t--> curr: %u (%u)\n",
 450                         smp_processor_id(), (unsigned int)prev->pid,
 451                         (unsigned int)prev->tgid, (unsigned int)task->pid,
 452                         (unsigned int)task->tgid);
 453 */
 454
 455         if (is_profile_process(task)) {
 456                 add_active_thread(cpu_ctx, task->pid, task->tgid);
 457                 atomic_inc(&cpu_ctx->nr_active);
 458
 459                 if (atomic_read(&cpu_ctx->nr_active) == 1) {
 460                         if (ctx->pmu)
 461                                 ctx->pmu->start();
 462
 463                         if (ctx->pl310)
 464                                 ctx->pl310->read(events, 1);
 465
 466                         start_hrtimer(cpu_ctx);
 467                         atomic_inc(&hrt.nr_active_all_core);
 468                 }
 469         }
 470 }
 471
 472 void __quadd_task_sched_out(struct task_struct *prev,
 473                             struct task_struct *next)
 474 {
 475         int n;
 476         struct pt_regs *user_regs;
 477         struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
 478         struct quadd_ctx *ctx = hrt.quadd_ctx;
 479         /* static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 2); */
 480
 481         if (likely(!hrt.active))
 482                 return;
 483 /*
 484         if (__ratelimit(&ratelimit_state))
 485                 pr_info("sch_out: cpu: %d, prev: %u (%u) \t--> next: %u (%u)\n",
 486                         smp_processor_id(), (unsigned int)prev->pid,
 487                         (unsigned int)prev->tgid, (unsigned int)next->pid,
 488                         (unsigned int)next->tgid);
 489 */
 490
 491         if (is_profile_process(prev)) {
 492                 user_regs = task_pt_regs(prev);
 493                 if (user_regs)
 494                         read_all_sources(user_regs, prev);
 495
 496                 n = remove_active_thread(cpu_ctx, prev->pid);
 497                 atomic_sub(n, &cpu_ctx->nr_active);
 498
 499                 if (n && atomic_read(&cpu_ctx->nr_active) == 0) {
 500                         cancel_hrtimer(cpu_ctx);
 501                         atomic_dec(&hrt.nr_active_all_core);
 502
 503                         if (ctx->pmu)
 504                                 ctx->pmu->stop();
 505                 }
 506         }
 507 }
 508
 509 void __quadd_event_mmap(struct vm_area_struct *vma)
 510 {
 511         struct quadd_parameters *param;
 512
 513         if (likely(!hrt.active))
 514                 return;
 515
 516         if (!is_profile_process(current))
 517                 return;
 518
 519         param = &hrt.quadd_ctx->param;
 520         quadd_process_mmap(vma, param->pids[0]);
 521 }
 522
 523 static void reset_cpu_ctx(void)
 524 {
 525         int cpu_id;
 526         struct quadd_cpu_context *cpu_ctx;
 527         struct quadd_thread_data *t_data;
 528
 529         for (cpu_id = 0; cpu_id < nr_cpu_ids; cpu_id++) {
 530                 cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id);
 531                 t_data = &cpu_ctx->active_thread;
 532
 533                 atomic_set(&cpu_ctx->nr_active, 0);
 534
 535                 t_data->pid = -1;
 536                 t_data->tgid = -1;
 537         }
 538 }
 539
 540 int quadd_hrt_start(void)
 541 {
 542         int err;
 543         u64 period;
 544         long freq;
 545         unsigned int extra;
 546         struct quadd_ctx *ctx = hrt.quadd_ctx;
 547         struct quadd_parameters *param = &ctx->param;
 548
 549         freq = ctx->param.freq;
 550         freq = max_t(long, QUADD_HRT_MIN_FREQ, freq);
 551         period = NSEC_PER_SEC / freq;
 552         hrt.sample_period = period;
 553
 554         if (ctx->param.ma_freq > 0)
 555                 hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq;
 556         else
 557                 hrt.ma_period = 0;
 558
 559         atomic64_set(&hrt.counter_samples, 0);
 560
 561         reset_cpu_ctx();
 562
 563         put_header();
 564
 565         extra = param->reserved[QUADD_PARAM_IDX_EXTRA];
 566
 567         if (extra & QUADD_PARAM_EXTRA_GET_MMAP) {
 568                 err = quadd_get_current_mmap(param->pids[0]);
 569                 if (err) {
 570                         pr_err("error: quadd_get_current_mmap\n");
 571                         return err;
 572                 }
 573         }
 574
 575         if (ctx->pl310)
 576                 ctx->pl310->start();
 577
 578         quadd_ma_start(&hrt);
 579
 580         hrt.active = 1;
 581
 582         pr_info("Start hrt: freq/period: %ld/%llu\n", freq, period);
 583         return 0;
 584 }
 585
 586 void quadd_hrt_stop(void)
 587 {
 588         struct quadd_ctx *ctx = hrt.quadd_ctx;
 589
 590         pr_info("Stop hrt, number of samples: %llu\n",
 591                 atomic64_read(&hrt.counter_samples));
 592
 593         if (ctx->pl310)
 594                 ctx->pl310->stop();
 595
 596         quadd_ma_stop(&hrt);
 597
 598         hrt.active = 0;
 599
 600         atomic64_set(&hrt.counter_samples, 0);
 601
 602         /* reset_cpu_ctx(); */
 603 }
 604
 605 void quadd_hrt_deinit(void)
 606 {
 607         if (hrt.active)
 608                 quadd_hrt_stop();
 609
 610         free_percpu(hrt.cpu_ctx);
 611 }
 612
 613 void quadd_hrt_get_state(struct quadd_module_state *state)
 614 {
 615         state->nr_all_samples = atomic64_read(&hrt.counter_samples);
 616         state->nr_skipped_samples = 0;
 617 }
 618
 619 struct quadd_hrt_ctx *quadd_hrt_init(struct quadd_ctx *ctx)
 620 {
 621         int cpu_id;
 622         u64 period;
 623         long freq;
 624         struct quadd_cpu_context *cpu_ctx;
 625
 626         hrt.quadd_ctx = ctx;
 627         hrt.active = 0;
 628
 629         freq = ctx->param.freq;
 630         freq = max_t(long, QUADD_HRT_MIN_FREQ, freq);
 631         period = NSEC_PER_SEC / freq;
 632         hrt.sample_period = period;
 633
 634         if (ctx->param.ma_freq > 0)
 635                 hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq;
 636         else
 637                 hrt.ma_period = 0;
 638
 639         atomic64_set(&hrt.counter_samples, 0);
 640         hrt.tc = arch_timer_get_timecounter();
 641
 642         hrt.cpu_ctx = alloc_percpu(struct quadd_cpu_context);
 643         if (!hrt.cpu_ctx)
 644                 return ERR_PTR(-ENOMEM);
 645
 646         for (cpu_id = 0; cpu_id < nr_cpu_ids; cpu_id++) {
 647                 cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id);
 648
 649                 atomic_set(&cpu_ctx->nr_active, 0);
 650
 651                 cpu_ctx->active_thread.pid = -1;
 652                 cpu_ctx->active_thread.tgid = -1;
 653
 654                 init_hrtimer(cpu_ctx);
 655         }
 656
 657         return &hrt;
 658 }