drivers/misc/tegra-profiler/hrt.c

   1 /*
   2  * drivers/misc/tegra-profiler/hrt.c
   3  *
   4  * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
   5  *
   6  * This program is free software; you can redistribute it and/or modify it
   7  * under the terms and conditions of the GNU General Public License,
   8  * version 2, as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13  * more details.
  14  *
  15  */
  16
  17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  18
  19 #include <linux/sched.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/slab.h>
  22 #include <linux/cpu.h>
  23 #include <linux/ptrace.h>
  24 #include <linux/interrupt.h>
  25 #include <linux/err.h>
  26 #include <linux/nsproxy.h>
  27
  28 #include <asm/cputype.h>
  29 #include <asm/irq_regs.h>
  30
  31 #include <linux/tegra_profiler.h>
  32
  33 #include "quadd.h"
  34 #include "hrt.h"
  35 #include "comm.h"
  36 #include "mmap.h"
  37 #include "ma.h"
  38 #include "power_clk.h"
  39 #include "tegra.h"
  40 #include "debug.h"
  41
  42 static struct quadd_hrt_ctx hrt;
  43
  44 static void
  45 read_all_sources(struct pt_regs *regs, struct task_struct *task);
  46
  47 struct hrt_event_value {
  48         int event_id;
  49         u32 value;
  50 };
  51
  52 static enum hrtimer_restart hrtimer_handler(struct hrtimer *hrtimer)
  53 {
  54         struct pt_regs *regs;
  55
  56         regs = get_irq_regs();
  57
  58         if (!hrt.active)
  59                 return HRTIMER_NORESTART;
  60
  61         qm_debug_handler_sample(regs);
  62
  63         if (regs)
  64                 read_all_sources(regs, NULL);
  65
  66         hrtimer_forward_now(hrtimer, ns_to_ktime(hrt.sample_period));
  67         qm_debug_timer_forward(regs, hrt.sample_period);
  68
  69         return HRTIMER_RESTART;
  70 }
  71
  72 static void start_hrtimer(struct quadd_cpu_context *cpu_ctx)
  73 {
  74         u64 period = hrt.sample_period;
  75
  76         __hrtimer_start_range_ns(&cpu_ctx->hrtimer,
  77                                  ns_to_ktime(period), 0,
  78                                  HRTIMER_MODE_REL_PINNED, 0);
  79         qm_debug_timer_start(NULL, period);
  80 }
  81
  82 static void cancel_hrtimer(struct quadd_cpu_context *cpu_ctx)
  83 {
  84         hrtimer_cancel(&cpu_ctx->hrtimer);
  85         qm_debug_timer_cancel();
  86 }
  87
  88 static void init_hrtimer(struct quadd_cpu_context *cpu_ctx)
  89 {
  90         hrtimer_init(&cpu_ctx->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  91         cpu_ctx->hrtimer.function = hrtimer_handler;
  92 }
  93
  94 u64 quadd_get_time(void)
  95 {
  96         struct timespec ts;
  97
  98         do_posix_clock_monotonic_gettime(&ts);
  99         return timespec_to_ns(&ts);
 100 }
 101
 102 static void put_header(void)
 103 {
 104         int nr_events = 0, max_events = QUADD_MAX_COUNTERS;
 105         int events[QUADD_MAX_COUNTERS];
 106         struct quadd_record_data record;
 107         struct quadd_header_data *hdr = &record.hdr;
 108         struct quadd_parameters *param = &hrt.quadd_ctx->param;
 109         unsigned int extra = param->reserved[QUADD_PARAM_IDX_EXTRA];
 110         struct quadd_iovec vec;
 111         struct quadd_ctx *ctx = hrt.quadd_ctx;
 112         struct quadd_event_source_interface *pmu = ctx->pmu;
 113         struct quadd_event_source_interface *pl310 = ctx->pl310;
 114
 115         record.record_type = QUADD_RECORD_TYPE_HEADER;
 116
 117         hdr->magic = QUADD_HEADER_MAGIC;
 118         hdr->version = QUADD_SAMPLES_VERSION;
 119
 120         hdr->backtrace = param->backtrace;
 121         hdr->use_freq = param->use_freq;
 122         hdr->system_wide = param->system_wide;
 123
 124         /* TODO: dynamically */
 125 #ifdef QM_DEBUG_SAMPLES_ENABLE
 126         hdr->debug_samples = 1;
 127 #else
 128         hdr->debug_samples = 0;
 129 #endif
 130
 131         hdr->freq = param->freq;
 132         hdr->ma_freq = param->ma_freq;
 133         hdr->power_rate_freq = param->power_rate_freq;
 134
 135         hdr->power_rate = hdr->power_rate_freq > 0 ? 1 : 0;
 136         hdr->get_mmap = (extra & QUADD_PARAM_EXTRA_GET_MMAP) ? 1 : 0;
 137
 138         hdr->reserved = 0;
 139         hdr->extra_length = 0;
 140
 141         if (pmu)
 142                 nr_events += pmu->get_current_events(events, max_events);
 143
 144         if (pl310)
 145                 nr_events += pl310->get_current_events(events + nr_events,
 146                                                        max_events - nr_events);
 147
 148         hdr->nr_events = nr_events;
 149
 150         vec.base = events;
 151         vec.len = nr_events * sizeof(events[0]);
 152
 153         quadd_put_sample(&record, &vec, 1);
 154 }
 155
 156 void quadd_put_sample(struct quadd_record_data *data,
 157                       struct quadd_iovec *vec, int vec_count)
 158 {
 159         struct quadd_comm_data_interface *comm = hrt.quadd_ctx->comm;
 160
 161         comm->put_sample(data, vec, vec_count);
 162         atomic64_inc(&hrt.counter_samples);
 163 }
 164
 165 static int get_sample_data(struct quadd_sample_data *sample,
 166                            struct pt_regs *regs,
 167                            struct task_struct *task)
 168 {
 169         unsigned int cpu, flags;
 170         struct quadd_ctx *quadd_ctx = hrt.quadd_ctx;
 171
 172         cpu = quadd_get_processor_id(regs, &flags);
 173         sample->cpu = cpu;
 174
 175         sample->lp_mode =
 176                 (flags & QUADD_CPUMODE_TEGRA_POWER_CLUSTER_LP) ? 1 : 0;
 177         sample->thumb_mode = (flags & QUADD_CPUMODE_THUMB) ? 1 : 0;
 178         sample->user_mode = user_mode(regs) ? 1 : 0;
 179
 180         /* For security reasons, hide IPs from the kernel space. */
 181         if (!sample->user_mode && !quadd_ctx->collect_kernel_ips)
 182                 sample->ip = 0;
 183         else
 184                 sample->ip = instruction_pointer(regs);
 185
 186         sample->time = quadd_get_time();
 187         sample->reserved = 0;
 188         sample->pid = task->pid;
 189         sample->in_interrupt = in_interrupt() ? 1 : 0;
 190
 191         return 0;
 192 }
 193
 194 static int read_source(struct quadd_event_source_interface *source,
 195                        struct pt_regs *regs,
 196                        struct hrt_event_value *events_vals,
 197                        int max_events)
 198 {
 199         int nr_events, i;
 200         u32 prev_val, val, res_val;
 201         struct event_data events[QUADD_MAX_COUNTERS];
 202
 203         if (!source)
 204                 return 0;
 205
 206         max_events = min_t(int, max_events, QUADD_MAX_COUNTERS);
 207         nr_events = source->read(events, max_events);
 208
 209         for (i = 0; i < nr_events; i++) {
 210                 struct event_data *s = &events[i];
 211
 212                 prev_val = s->prev_val;
 213                 val = s->val;
 214
 215                 if (prev_val <= val)
 216                         res_val = val - prev_val;
 217                 else
 218                         res_val = QUADD_U32_MAX - prev_val + val;
 219
 220                 if (s->event_source == QUADD_EVENT_SOURCE_PL310) {
 221                         int nr_active = atomic_read(&hrt.nr_active_all_core);
 222                         if (nr_active > 1)
 223                                 res_val /= nr_active;
 224                 }
 225
 226                 events_vals[i].event_id = s->event_id;
 227                 events_vals[i].value = res_val;
 228         }
 229
 230         return nr_events;
 231 }
 232
 233 static void
 234 read_all_sources(struct pt_regs *regs, struct task_struct *task)
 235 {
 236         u32 state, extra_data = 0;
 237         int i, vec_idx = 0, bt_size = 0;
 238         int nr_events = 0, nr_positive_events = 0;
 239         struct pt_regs *user_regs;
 240         struct quadd_iovec vec[5];
 241         struct hrt_event_value events[QUADD_MAX_COUNTERS];
 242         u32 events_extra[QUADD_MAX_COUNTERS];
 243
 244         struct quadd_record_data record_data;
 245         struct quadd_sample_data *s = &record_data.sample;
 246
 247         struct quadd_ctx *ctx = hrt.quadd_ctx;
 248         struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
 249         struct quadd_callchain *cc = &cpu_ctx->cc;
 250
 251         if (!regs)
 252                 return;
 253
 254         if (atomic_read(&cpu_ctx->nr_active) == 0)
 255                 return;
 256
 257         if (!task)
 258                 task = current;
 259
 260         rcu_read_lock();
 261         if (!task_nsproxy(task)) {
 262                 rcu_read_unlock();
 263                 return;
 264         }
 265         rcu_read_unlock();
 266
 267         if (ctx->pmu && ctx->pmu_info.active)
 268                 nr_events += read_source(ctx->pmu, regs,
 269                                          events, QUADD_MAX_COUNTERS);
 270
 271         if (ctx->pl310 && ctx->pl310_info.active)
 272                 nr_events += read_source(ctx->pl310, regs,
 273                                          events + nr_events,
 274                                          QUADD_MAX_COUNTERS - nr_events);
 275
 276         if (!nr_events)
 277                 return;
 278
 279         if (user_mode(regs))
 280                 user_regs = regs;
 281         else
 282                 user_regs = current_pt_regs();
 283
 284         if (get_sample_data(s, regs, task))
 285                 return;
 286
 287         if (cc->cs_64)
 288                 extra_data |= QUADD_SED_IP64;
 289
 290         vec[vec_idx].base = &extra_data;
 291         vec[vec_idx].len = sizeof(extra_data);
 292         vec_idx++;
 293
 294         s->reserved = 0;
 295         cc->unw_method = QUADD_URC_SUCCESS;
 296
 297         if (ctx->param.backtrace) {
 298                 bt_size = quadd_get_user_callchain(user_regs, cc, ctx, task);
 299
 300                 if (!bt_size && !user_mode(regs)) {
 301                         unsigned long pc = instruction_pointer(user_regs);
 302
 303                         cc->nr = 0;
 304 #ifdef CONFIG_ARM64
 305                         cc->cs_64 = compat_user_mode(user_regs) ? 0 : 1;
 306 #else
 307                         cc->cs_64 = 0;
 308 #endif
 309                         bt_size += quadd_callchain_store(cc, pc,
 310                                                          QUADD_UNW_TYPE_KCTX);
 311                 }
 312
 313                 if (bt_size > 0) {
 314                         int ip_size = cc->cs_64 ? sizeof(u64) : sizeof(u32);
 315                         int nr_types = DIV_ROUND_UP(bt_size, 8);
 316
 317                         vec[vec_idx].base = cc->cs_64 ?
 318                                 (void *)cc->ip_64 : (void *)cc->ip_32;
 319                         vec[vec_idx].len = bt_size * ip_size;
 320                         vec_idx++;
 321
 322                         vec[vec_idx].base = cc->types;
 323                         vec[vec_idx].len = nr_types * sizeof(cc->types[0]);
 324                         vec_idx++;
 325                 }
 326
 327                 extra_data |= cc->unw_method << QUADD_SED_UNW_METHOD_SHIFT;
 328                 s->reserved |= cc->unw_rc << QUADD_SAMPLE_URC_SHIFT;
 329         }
 330         s->callchain_nr = bt_size;
 331
 332         record_data.record_type = QUADD_RECORD_TYPE_SAMPLE;
 333
 334         s->events_flags = 0;
 335         for (i = 0; i < nr_events; i++) {
 336                 u32 value = events[i].value;
 337                 if (value > 0) {
 338                         s->events_flags |= 1 << i;
 339                         events_extra[nr_positive_events++] = value;
 340                 }
 341         }
 342
 343         if (nr_positive_events == 0)
 344                 return;
 345
 346         vec[vec_idx].base = events_extra;
 347         vec[vec_idx].len = nr_positive_events * sizeof(events_extra[0]);
 348         vec_idx++;
 349
 350         state = task->state;
 351         if (state) {
 352                 s->state = 1;
 353                 vec[vec_idx].base = &state;
 354                 vec[vec_idx].len = sizeof(state);
 355                 vec_idx++;
 356         } else {
 357                 s->state = 0;
 358         }
 359
 360         quadd_put_sample(&record_data, vec, vec_idx);
 361 }
 362
 363 static inline int
 364 is_profile_process(struct task_struct *task)
 365 {
 366         int i;
 367         pid_t pid, profile_pid;
 368         struct quadd_ctx *ctx = hrt.quadd_ctx;
 369
 370         if (!task)
 371                 return 0;
 372
 373         pid = task->tgid;
 374
 375         for (i = 0; i < ctx->param.nr_pids; i++) {
 376                 profile_pid = ctx->param.pids[i];
 377                 if (profile_pid == pid)
 378                         return 1;
 379         }
 380         return 0;
 381 }
 382
 383 static int
 384 add_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid, pid_t tgid)
 385 {
 386         struct quadd_thread_data *t_data = &cpu_ctx->active_thread;
 387
 388         if (t_data->pid > 0 ||
 389                 atomic_read(&cpu_ctx->nr_active) > 0) {
 390                 pr_warn_once("Warning for thread: %d\n", (int)pid);
 391                 return 0;
 392         }
 393
 394         t_data->pid = pid;
 395         t_data->tgid = tgid;
 396         return 1;
 397 }
 398
 399 static int remove_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid)
 400 {
 401         struct quadd_thread_data *t_data = &cpu_ctx->active_thread;
 402
 403         if (t_data->pid < 0)
 404                 return 0;
 405
 406         if (t_data->pid == pid) {
 407                 t_data->pid = -1;
 408                 t_data->tgid = -1;
 409                 return 1;
 410         }
 411
 412         pr_warn_once("Warning for thread: %d\n", (int)pid);
 413         return 0;
 414 }
 415
 416 void __quadd_task_sched_in(struct task_struct *prev,
 417                            struct task_struct *task)
 418 {
 419         struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
 420         struct quadd_ctx *ctx = hrt.quadd_ctx;
 421         struct event_data events[QUADD_MAX_COUNTERS];
 422         /* static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 2); */
 423
 424         if (likely(!hrt.active))
 425                 return;
 426 /*
 427         if (__ratelimit(&ratelimit_state))
 428                 pr_info("sch_in, cpu: %d, prev: %u (%u) \t--> curr: %u (%u)\n",
 429                         smp_processor_id(), (unsigned int)prev->pid,
 430                         (unsigned int)prev->tgid, (unsigned int)task->pid,
 431                         (unsigned int)task->tgid);
 432 */
 433
 434         if (is_profile_process(task)) {
 435                 add_active_thread(cpu_ctx, task->pid, task->tgid);
 436                 atomic_inc(&cpu_ctx->nr_active);
 437
 438                 if (atomic_read(&cpu_ctx->nr_active) == 1) {
 439                         if (ctx->pmu)
 440                                 ctx->pmu->start();
 441
 442                         if (ctx->pl310)
 443                                 ctx->pl310->read(events, 1);
 444
 445                         start_hrtimer(cpu_ctx);
 446                         atomic_inc(&hrt.nr_active_all_core);
 447                 }
 448         }
 449 }
 450
 451 void __quadd_task_sched_out(struct task_struct *prev,
 452                             struct task_struct *next)
 453 {
 454         int n;
 455         struct pt_regs *user_regs;
 456         struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
 457         struct quadd_ctx *ctx = hrt.quadd_ctx;
 458         /* static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 2); */
 459
 460         if (likely(!hrt.active))
 461                 return;
 462 /*
 463         if (__ratelimit(&ratelimit_state))
 464                 pr_info("sch_out: cpu: %d, prev: %u (%u) \t--> next: %u (%u)\n",
 465                         smp_processor_id(), (unsigned int)prev->pid,
 466                         (unsigned int)prev->tgid, (unsigned int)next->pid,
 467                         (unsigned int)next->tgid);
 468 */
 469
 470         if (is_profile_process(prev)) {
 471                 user_regs = task_pt_regs(prev);
 472                 if (user_regs)
 473                         read_all_sources(user_regs, prev);
 474
 475                 n = remove_active_thread(cpu_ctx, prev->pid);
 476                 atomic_sub(n, &cpu_ctx->nr_active);
 477
 478                 if (n && atomic_read(&cpu_ctx->nr_active) == 0) {
 479                         cancel_hrtimer(cpu_ctx);
 480                         atomic_dec(&hrt.nr_active_all_core);
 481
 482                         if (ctx->pmu)
 483                                 ctx->pmu->stop();
 484                 }
 485         }
 486 }
 487
 488 void __quadd_event_mmap(struct vm_area_struct *vma)
 489 {
 490         struct quadd_parameters *param;
 491
 492         if (likely(!hrt.active))
 493                 return;
 494
 495         if (!is_profile_process(current))
 496                 return;
 497
 498         param = &hrt.quadd_ctx->param;
 499         quadd_process_mmap(vma, param->pids[0]);
 500 }
 501
 502 static void reset_cpu_ctx(void)
 503 {
 504         int cpu_id;
 505         struct quadd_cpu_context *cpu_ctx;
 506         struct quadd_thread_data *t_data;
 507
 508         for (cpu_id = 0; cpu_id < nr_cpu_ids; cpu_id++) {
 509                 cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id);
 510                 t_data = &cpu_ctx->active_thread;
 511
 512                 atomic_set(&cpu_ctx->nr_active, 0);
 513
 514                 t_data->pid = -1;
 515                 t_data->tgid = -1;
 516         }
 517 }
 518
 519 int quadd_hrt_start(void)
 520 {
 521         int err;
 522         u64 period;
 523         long freq;
 524         unsigned int extra;
 525         struct quadd_ctx *ctx = hrt.quadd_ctx;
 526         struct quadd_parameters *param = &ctx->param;
 527
 528         freq = ctx->param.freq;
 529         freq = max_t(long, QUADD_HRT_MIN_FREQ, freq);
 530         period = NSEC_PER_SEC / freq;
 531         hrt.sample_period = period;
 532
 533         if (ctx->param.ma_freq > 0)
 534                 hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq;
 535         else
 536                 hrt.ma_period = 0;
 537
 538         atomic64_set(&hrt.counter_samples, 0);
 539
 540         reset_cpu_ctx();
 541
 542         put_header();
 543
 544         extra = param->reserved[QUADD_PARAM_IDX_EXTRA];
 545
 546         if (extra & QUADD_PARAM_EXTRA_GET_MMAP) {
 547                 err = quadd_get_current_mmap(param->pids[0]);
 548                 if (err) {
 549                         pr_err("error: quadd_get_current_mmap\n");
 550                         return err;
 551                 }
 552         }
 553
 554         if (ctx->pl310)
 555                 ctx->pl310->start();
 556
 557         quadd_ma_start(&hrt);
 558
 559         hrt.active = 1;
 560
 561         pr_info("Start hrt: freq/period: %ld/%llu\n", freq, period);
 562         return 0;
 563 }
 564
 565 void quadd_hrt_stop(void)
 566 {
 567         struct quadd_ctx *ctx = hrt.quadd_ctx;
 568
 569         pr_info("Stop hrt, number of samples: %llu\n",
 570                 atomic64_read(&hrt.counter_samples));
 571
 572         if (ctx->pl310)
 573                 ctx->pl310->stop();
 574
 575         quadd_ma_stop(&hrt);
 576
 577         hrt.active = 0;
 578
 579         atomic64_set(&hrt.counter_samples, 0);
 580
 581         /* reset_cpu_ctx(); */
 582 }
 583
 584 void quadd_hrt_deinit(void)
 585 {
 586         if (hrt.active)
 587                 quadd_hrt_stop();
 588
 589         free_percpu(hrt.cpu_ctx);
 590 }
 591
 592 void quadd_hrt_get_state(struct quadd_module_state *state)
 593 {
 594         state->nr_all_samples = atomic64_read(&hrt.counter_samples);
 595         state->nr_skipped_samples = 0;
 596 }
 597
 598 struct quadd_hrt_ctx *quadd_hrt_init(struct quadd_ctx *ctx)
 599 {
 600         int cpu_id;
 601         u64 period;
 602         long freq;
 603         struct quadd_cpu_context *cpu_ctx;
 604
 605         hrt.quadd_ctx = ctx;
 606         hrt.active = 0;
 607
 608         freq = ctx->param.freq;
 609         freq = max_t(long, QUADD_HRT_MIN_FREQ, freq);
 610         period = NSEC_PER_SEC / freq;
 611         hrt.sample_period = period;
 612
 613         if (ctx->param.ma_freq > 0)
 614                 hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq;
 615         else
 616                 hrt.ma_period = 0;
 617
 618         atomic64_set(&hrt.counter_samples, 0);
 619
 620         hrt.cpu_ctx = alloc_percpu(struct quadd_cpu_context);
 621         if (!hrt.cpu_ctx)
 622                 return ERR_PTR(-ENOMEM);
 623
 624         for (cpu_id = 0; cpu_id < nr_cpu_ids; cpu_id++) {
 625                 cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id);
 626
 627                 atomic_set(&cpu_ctx->nr_active, 0);
 628
 629                 cpu_ctx->active_thread.pid = -1;
 630                 cpu_ctx->active_thread.tgid = -1;
 631
 632                 init_hrtimer(cpu_ctx);
 633         }
 634
 635         return &hrt;
 636 }