drivers/misc/tegra-profiler/hrt.c

   1 /*
   2  * drivers/misc/tegra-profiler/hrt.c
   3  *
   4  * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
   5  *
   6  * This program is free software; you can redistribute it and/or modify it
   7  * under the terms and conditions of the GNU General Public License,
   8  * version 2, as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13  * more details.
  14  *
  15  */
  16
  17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  18
  19 #include <linux/sched.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/slab.h>
  22 #include <linux/cpu.h>
  23 #include <linux/ptrace.h>
  24 #include <linux/interrupt.h>
  25 #include <linux/err.h>
  26 #include <clocksource/arm_arch_timer.h>
  27
  28 #include <asm/cputype.h>
  29 #include <asm/irq_regs.h>
  30 #include <asm/arch_timer.h>
  31
  32 #include <linux/tegra_profiler.h>
  33
  34 #include "quadd.h"
  35 #include "hrt.h"
  36 #include "comm.h"
  37 #include "mmap.h"
  38 #include "ma.h"
  39 #include "power_clk.h"
  40 #include "tegra.h"
  41 #include "debug.h"
  42
  43 static struct quadd_hrt_ctx hrt;
  44
  45 static void
  46 read_all_sources(struct pt_regs *regs, struct task_struct *task);
  47
  48 struct hrt_event_value {
  49         int event_id;
  50         u32 value;
  51 };
  52
  53 static inline u32 get_task_state(struct task_struct *task)
  54 {
  55         return (u32)(task->state | task->exit_state);
  56 }
  57
  58 static enum hrtimer_restart hrtimer_handler(struct hrtimer *hrtimer)
  59 {
  60         struct pt_regs *regs;
  61
  62         regs = get_irq_regs();
  63
  64         if (!atomic_read(&hrt.active))
  65                 return HRTIMER_NORESTART;
  66
  67         qm_debug_handler_sample(regs);
  68
  69         if (regs)
  70                 read_all_sources(regs, current);
  71
  72         hrtimer_forward_now(hrtimer, ns_to_ktime(hrt.sample_period));
  73         qm_debug_timer_forward(regs, hrt.sample_period);
  74
  75         return HRTIMER_RESTART;
  76 }
  77
  78 static void start_hrtimer(struct quadd_cpu_context *cpu_ctx)
  79 {
  80         u64 period = hrt.sample_period;
  81
  82         __hrtimer_start_range_ns(&cpu_ctx->hrtimer,
  83                                  ns_to_ktime(period), 0,
  84                                  HRTIMER_MODE_REL_PINNED, 0);
  85         qm_debug_timer_start(NULL, period);
  86 }
  87
  88 static void cancel_hrtimer(struct quadd_cpu_context *cpu_ctx)
  89 {
  90         hrtimer_cancel(&cpu_ctx->hrtimer);
  91         qm_debug_timer_cancel();
  92 }
  93
  94 static void init_hrtimer(struct quadd_cpu_context *cpu_ctx)
  95 {
  96         hrtimer_init(&cpu_ctx->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  97         cpu_ctx->hrtimer.function = hrtimer_handler;
  98 }
  99
 100 static inline u64 get_posix_clock_monotonic_time(void)
 101 {
 102         struct timespec ts;
 103
 104         do_posix_clock_monotonic_gettime(&ts);
 105         return timespec_to_ns(&ts);
 106 }
 107
 108 static inline u64 get_arch_time(struct timecounter *tc)
 109 {
 110         cycle_t value;
 111         const struct cyclecounter *cc = tc->cc;
 112
 113         value = cc->read(cc);
 114         return cyclecounter_cyc2ns(cc, value);
 115 }
 116
 117 u64 quadd_get_time(void)
 118 {
 119         struct timecounter *tc = hrt.tc;
 120
 121         return (tc && hrt.use_arch_timer) ?
 122                 get_arch_time(tc) :
 123                 get_posix_clock_monotonic_time();
 124 }
 125
 126 static void
 127 __put_sample(struct quadd_record_data *data,
 128              struct quadd_iovec *vec,
 129              int vec_count, int cpu_id)
 130 {
 131         ssize_t err;
 132         struct quadd_comm_data_interface *comm = hrt.quadd_ctx->comm;
 133
 134         err = comm->put_sample(data, vec, vec_count, cpu_id);
 135         if (err < 0)
 136                 atomic64_inc(&hrt.skipped_samples);
 137
 138         atomic64_inc(&hrt.counter_samples);
 139 }
 140
 141 void
 142 quadd_put_sample_this_cpu(struct quadd_record_data *data,
 143                           struct quadd_iovec *vec, int vec_count)
 144 {
 145         __put_sample(data, vec, vec_count, -1);
 146 }
 147
 148 void
 149 quadd_put_sample(struct quadd_record_data *data,
 150                  struct quadd_iovec *vec, int vec_count)
 151 {
 152         __put_sample(data, vec, vec_count, 0);
 153 }
 154
 155 static void put_header(int cpuid)
 156 {
 157         int nr_events = 0, max_events = QUADD_MAX_COUNTERS;
 158         int events[QUADD_MAX_COUNTERS];
 159         struct quadd_record_data record;
 160         struct quadd_header_data *hdr = &record.hdr;
 161         struct quadd_parameters *param = &hrt.quadd_ctx->param;
 162         unsigned int extra = param->reserved[QUADD_PARAM_IDX_EXTRA];
 163         struct quadd_iovec vec[2];
 164         struct quadd_ctx *ctx = hrt.quadd_ctx;
 165         struct quadd_event_source_interface *pmu = ctx->pmu;
 166         struct quadd_event_source_interface *pl310 = ctx->pl310;
 167         u32 cpuid_data = cpuid;
 168
 169         record.record_type = QUADD_RECORD_TYPE_HEADER;
 170
 171         hdr->magic = QUADD_HEADER_MAGIC;
 172         hdr->version = QUADD_SAMPLES_VERSION;
 173
 174         hdr->backtrace = param->backtrace;
 175         hdr->use_freq = param->use_freq;
 176         hdr->system_wide = param->system_wide;
 177
 178         /* TODO: dynamically */
 179 #ifdef QM_DEBUG_SAMPLES_ENABLE
 180         hdr->debug_samples = 1;
 181 #else
 182         hdr->debug_samples = 0;
 183 #endif
 184
 185         hdr->freq = param->freq;
 186         hdr->ma_freq = param->ma_freq;
 187         hdr->power_rate_freq = param->power_rate_freq;
 188
 189         hdr->power_rate = hdr->power_rate_freq > 0 ? 1 : 0;
 190         hdr->get_mmap = (extra & QUADD_PARAM_EXTRA_GET_MMAP) ? 1 : 0;
 191
 192         hdr->reserved = 0;
 193         hdr->extra_length = 0;
 194
 195         if (hdr->backtrace) {
 196                 struct quadd_unw_methods *um = &hrt.um;
 197
 198                 hdr->reserved |= um->fp ? QUADD_HDR_BT_FP : 0;
 199                 hdr->reserved |= um->ut ? QUADD_HDR_BT_UT : 0;
 200                 hdr->reserved |= um->ut_ce ? QUADD_HDR_BT_UT_CE : 0;
 201                 hdr->reserved |= um->dwarf ? QUADD_HDR_BT_DWARF : 0;
 202         }
 203
 204         if (hrt.use_arch_timer)
 205                 hdr->reserved |= QUADD_HDR_USE_ARCH_TIMER;
 206
 207         if (hrt.get_stack_offset)
 208                 hdr->reserved |= QUADD_HDR_STACK_OFFSET;
 209
 210         hdr->reserved |= QUADD_HDR_HAS_CPUID;
 211
 212         if (pmu)
 213                 nr_events += pmu->get_current_events(cpuid, events + nr_events,
 214                                                      max_events - nr_events);
 215
 216         if (pl310)
 217                 nr_events += pl310->get_current_events(cpuid,
 218                                                        events + nr_events,
 219                                                        max_events - nr_events);
 220
 221         hdr->nr_events = nr_events;
 222
 223         vec[0].base = events;
 224         vec[0].len = nr_events * sizeof(events[0]);
 225
 226         vec[1].base = &cpuid_data;
 227         vec[1].len = sizeof(cpuid_data);
 228
 229         __put_sample(&record, &vec[0], 2, cpuid);
 230 }
 231
 232 static void
 233 put_sched_sample(struct task_struct *task, int is_sched_in)
 234 {
 235         unsigned int cpu, flags;
 236         struct quadd_record_data record;
 237         struct quadd_sched_data *s = &record.sched;
 238
 239         record.record_type = QUADD_RECORD_TYPE_SCHED;
 240
 241         cpu = quadd_get_processor_id(NULL, &flags);
 242         s->cpu = cpu;
 243         s->lp_mode = (flags & QUADD_CPUMODE_TEGRA_POWER_CLUSTER_LP) ? 1 : 0;
 244
 245         s->sched_in = is_sched_in ? 1 : 0;
 246         s->time = quadd_get_time();
 247         s->pid = task->pid;
 248         s->tgid = task->tgid;
 249
 250         s->reserved = 0;
 251
 252         s->data[QUADD_SCHED_IDX_TASK_STATE] = get_task_state(task);
 253         s->data[QUADD_SCHED_IDX_RESERVED] = 0;
 254
 255         quadd_put_sample_this_cpu(&record, NULL, 0);
 256 }
 257
 258 static int get_sample_data(struct quadd_sample_data *sample,
 259                            struct pt_regs *regs,
 260                            struct task_struct *task)
 261 {
 262         unsigned int cpu, flags;
 263         struct quadd_ctx *quadd_ctx = hrt.quadd_ctx;
 264
 265         cpu = quadd_get_processor_id(regs, &flags);
 266         sample->cpu = cpu;
 267
 268         sample->lp_mode =
 269                 (flags & QUADD_CPUMODE_TEGRA_POWER_CLUSTER_LP) ? 1 : 0;
 270         sample->thumb_mode = (flags & QUADD_CPUMODE_THUMB) ? 1 : 0;
 271         sample->user_mode = user_mode(regs) ? 1 : 0;
 272
 273         /* For security reasons, hide IPs from the kernel space. */
 274         if (!sample->user_mode && !quadd_ctx->collect_kernel_ips)
 275                 sample->ip = 0;
 276         else
 277                 sample->ip = instruction_pointer(regs);
 278
 279         sample->time = quadd_get_time();
 280         sample->reserved = 0;
 281         sample->pid = task->pid;
 282         sample->tgid = task->tgid;
 283         sample->in_interrupt = in_interrupt() ? 1 : 0;
 284
 285         return 0;
 286 }
 287
 288 static int read_source(struct quadd_event_source_interface *source,
 289                        struct pt_regs *regs,
 290                        struct hrt_event_value *events_vals,
 291                        int max_events)
 292 {
 293         int nr_events, i;
 294         u32 prev_val, val, res_val;
 295         struct event_data events[QUADD_MAX_COUNTERS];
 296
 297         if (!source)
 298                 return 0;
 299
 300         max_events = min_t(int, max_events, QUADD_MAX_COUNTERS);
 301         nr_events = source->read(events, max_events);
 302
 303         for (i = 0; i < nr_events; i++) {
 304                 struct event_data *s = &events[i];
 305
 306                 prev_val = s->prev_val;
 307                 val = s->val;
 308
 309                 if (prev_val <= val)
 310                         res_val = val - prev_val;
 311                 else
 312                         res_val = QUADD_U32_MAX - prev_val + val;
 313
 314                 if (s->event_source == QUADD_EVENT_SOURCE_PL310) {
 315                         int nr_active = atomic_read(&hrt.nr_active_all_core);
 316
 317                         if (nr_active > 1)
 318                                 res_val /= nr_active;
 319                 }
 320
 321                 events_vals[i].event_id = s->event_id;
 322                 events_vals[i].value = res_val;
 323         }
 324
 325         return nr_events;
 326 }
 327
 328 static long
 329 get_stack_offset(struct task_struct *task,
 330                  struct pt_regs *regs,
 331                  struct quadd_callchain *cc)
 332 {
 333         unsigned long sp;
 334         struct vm_area_struct *vma;
 335         struct mm_struct *mm = task->mm;
 336
 337         if (!regs || !mm)
 338                 return -ENOMEM;
 339
 340         sp = cc->nr > 0 ? cc->curr_sp :
 341                 quadd_user_stack_pointer(regs);
 342
 343         vma = find_vma(mm, sp);
 344         if (!vma)
 345                 return -ENOMEM;
 346
 347         return vma->vm_end - sp;
 348 }
 349
 350 static void
 351 read_all_sources(struct pt_regs *regs, struct task_struct *task)
 352 {
 353         u32 state, extra_data = 0, urcs = 0;
 354         int i, vec_idx = 0, bt_size = 0;
 355         int nr_events = 0, nr_positive_events = 0;
 356         struct pt_regs *user_regs;
 357         struct quadd_iovec vec[6];
 358         struct hrt_event_value events[QUADD_MAX_COUNTERS];
 359         u32 events_extra[QUADD_MAX_COUNTERS];
 360
 361         struct quadd_record_data record_data;
 362         struct quadd_sample_data *s = &record_data.sample;
 363
 364         struct quadd_ctx *ctx = hrt.quadd_ctx;
 365         struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
 366         struct quadd_callchain *cc = &cpu_ctx->cc;
 367
 368         if (atomic_read(&cpu_ctx->nr_active) == 0)
 369                 return;
 370
 371         if (task->flags & PF_EXITING)
 372                 return;
 373
 374         if (ctx->pmu && ctx->get_pmu_info()->active)
 375                 nr_events += read_source(ctx->pmu, regs,
 376                                          events, QUADD_MAX_COUNTERS);
 377
 378         if (ctx->pl310 && ctx->pl310_info.active)
 379                 nr_events += read_source(ctx->pl310, regs,
 380                                          events + nr_events,
 381                                          QUADD_MAX_COUNTERS - nr_events);
 382
 383         if (!nr_events)
 384                 return;
 385
 386         if (user_mode(regs))
 387                 user_regs = regs;
 388         else
 389                 user_regs = current_pt_regs();
 390
 391         if (get_sample_data(s, regs, task))
 392                 return;
 393
 394         vec[vec_idx].base = &extra_data;
 395         vec[vec_idx].len = sizeof(extra_data);
 396         vec_idx++;
 397
 398         s->reserved = 0;
 399
 400         cc->nr = 0;
 401         cc->curr_sp = 0;
 402         cc->curr_fp = 0;
 403         cc->curr_pc = 0;
 404         cc->curr_lr = 0;
 405
 406         if (ctx->param.backtrace) {
 407                 cc->um = hrt.um;
 408
 409                 bt_size = quadd_get_user_callchain(user_regs, cc, ctx, task);
 410
 411                 if (!bt_size && !user_mode(regs)) {
 412                         unsigned long pc = instruction_pointer(user_regs);
 413
 414                         cc->nr = 0;
 415 #ifdef CONFIG_ARM64
 416                         cc->cs_64 = compat_user_mode(user_regs) ? 0 : 1;
 417 #else
 418                         cc->cs_64 = 0;
 419 #endif
 420                         bt_size += quadd_callchain_store(cc, pc,
 421                                                          QUADD_UNW_TYPE_KCTX);
 422                 }
 423
 424                 if (bt_size > 0) {
 425                         int ip_size = cc->cs_64 ? sizeof(u64) : sizeof(u32);
 426                         int nr_types = DIV_ROUND_UP(bt_size, 8);
 427
 428                         vec[vec_idx].base = cc->cs_64 ?
 429                                 (void *)cc->ip_64 : (void *)cc->ip_32;
 430                         vec[vec_idx].len = bt_size * ip_size;
 431                         vec_idx++;
 432
 433                         vec[vec_idx].base = cc->types;
 434                         vec[vec_idx].len = nr_types * sizeof(cc->types[0]);
 435                         vec_idx++;
 436
 437                         if (cc->cs_64)
 438                                 extra_data |= QUADD_SED_IP64;
 439                 }
 440
 441                 urcs |= (cc->urc_fp & QUADD_SAMPLE_URC_MASK) <<
 442                         QUADD_SAMPLE_URC_SHIFT_FP;
 443                 urcs |= (cc->urc_ut & QUADD_SAMPLE_URC_MASK) <<
 444                         QUADD_SAMPLE_URC_SHIFT_UT;
 445                 urcs |= (cc->urc_dwarf & QUADD_SAMPLE_URC_MASK) <<
 446                         QUADD_SAMPLE_URC_SHIFT_DWARF;
 447
 448                 s->reserved |= QUADD_SAMPLE_RES_URCS_ENABLED;
 449
 450                 vec[vec_idx].base = &urcs;
 451                 vec[vec_idx].len = sizeof(urcs);
 452                 vec_idx++;
 453         }
 454         s->callchain_nr = bt_size;
 455
 456         if (hrt.get_stack_offset) {
 457                 long offset = get_stack_offset(task, user_regs, cc);
 458
 459                 if (offset > 0) {
 460                         u32 off = offset >> 2;
 461
 462                         off = min_t(u32, off, 0xffff);
 463                         extra_data |= off << QUADD_SED_STACK_OFFSET_SHIFT;
 464                 }
 465         }
 466
 467         record_data.record_type = QUADD_RECORD_TYPE_SAMPLE;
 468
 469         s->events_flags = 0;
 470         for (i = 0; i < nr_events; i++) {
 471                 u32 value = events[i].value;
 472
 473                 if (value > 0) {
 474                         s->events_flags |= 1 << i;
 475                         events_extra[nr_positive_events++] = value;
 476                 }
 477         }
 478
 479         if (nr_positive_events == 0)
 480                 return;
 481
 482         vec[vec_idx].base = events_extra;
 483         vec[vec_idx].len = nr_positive_events * sizeof(events_extra[0]);
 484         vec_idx++;
 485
 486         state = get_task_state(task);
 487         if (state) {
 488                 s->state = 1;
 489                 vec[vec_idx].base = &state;
 490                 vec[vec_idx].len = sizeof(state);
 491                 vec_idx++;
 492         } else {
 493                 s->state = 0;
 494         }
 495
 496         quadd_put_sample_this_cpu(&record_data, vec, vec_idx);
 497 }
 498
 499 static inline int
 500 is_sample_process(struct task_struct *task)
 501 {
 502         int i;
 503         pid_t pid, profile_pid;
 504         struct quadd_ctx *ctx = hrt.quadd_ctx;
 505
 506         if (!task)
 507                 return 0;
 508
 509         pid = task->tgid;
 510
 511         for (i = 0; i < ctx->param.nr_pids; i++) {
 512                 profile_pid = ctx->param.pids[i];
 513                 if (profile_pid == pid)
 514                         return 1;
 515         }
 516         return 0;
 517 }
 518
 519 static inline int
 520 is_swapper_task(struct task_struct *task)
 521 {
 522         if (task->pid == 0)
 523                 return 1;
 524
 525         return 0;
 526 }
 527
 528 static inline int
 529 is_trace_process(struct task_struct *task)
 530 {
 531         struct quadd_ctx *ctx = hrt.quadd_ctx;
 532
 533         if (!task)
 534                 return 0;
 535
 536         if (is_swapper_task(task))
 537                 return 0;
 538
 539         if (ctx->param.trace_all_tasks)
 540                 return 1;
 541
 542         return is_sample_process(task);
 543 }
 544
 545 static int
 546 add_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid, pid_t tgid)
 547 {
 548         struct quadd_thread_data *t_data = &cpu_ctx->active_thread;
 549
 550         if (t_data->pid > 0 ||
 551                 atomic_read(&cpu_ctx->nr_active) > 0) {
 552                 pr_warn_once("Warning for thread: %d\n", (int)pid);
 553                 return 0;
 554         }
 555
 556         t_data->pid = pid;
 557         t_data->tgid = tgid;
 558         return 1;
 559 }
 560
 561 static int remove_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid)
 562 {
 563         struct quadd_thread_data *t_data = &cpu_ctx->active_thread;
 564
 565         if (t_data->pid < 0)
 566                 return 0;
 567
 568         if (t_data->pid == pid) {
 569                 t_data->pid = -1;
 570                 t_data->tgid = -1;
 571                 return 1;
 572         }
 573
 574         pr_warn_once("Warning for thread: %d\n", (int)pid);
 575         return 0;
 576 }
 577
 578 void __quadd_task_sched_in(struct task_struct *prev,
 579                            struct task_struct *task)
 580 {
 581         struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
 582         struct quadd_ctx *ctx = hrt.quadd_ctx;
 583         struct event_data events[QUADD_MAX_COUNTERS];
 584         /* static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 2); */
 585
 586         if (likely(!atomic_read(&hrt.active)))
 587                 return;
 588 /*
 589         if (__ratelimit(&ratelimit_state))
 590                 pr_info("sch_in, cpu: %d, prev: %u (%u) \t--> curr: %u (%u)\n",
 591                         smp_processor_id(), (unsigned int)prev->pid,
 592                         (unsigned int)prev->tgid, (unsigned int)task->pid,
 593                         (unsigned int)task->tgid);
 594 */
 595
 596         if (is_trace_process(task))
 597                 put_sched_sample(task, 1);
 598
 599         if (is_sample_process(task)) {
 600                 add_active_thread(cpu_ctx, task->pid, task->tgid);
 601                 atomic_inc(&cpu_ctx->nr_active);
 602
 603                 if (atomic_read(&cpu_ctx->nr_active) == 1) {
 604                         if (ctx->pmu)
 605                                 ctx->pmu->start();
 606
 607                         if (ctx->pl310)
 608                                 ctx->pl310->read(events, 1);
 609
 610                         start_hrtimer(cpu_ctx);
 611                         atomic_inc(&hrt.nr_active_all_core);
 612                 }
 613         }
 614 }
 615
 616 void __quadd_task_sched_out(struct task_struct *prev,
 617                             struct task_struct *next)
 618 {
 619         int n;
 620         struct pt_regs *user_regs;
 621         struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
 622         struct quadd_ctx *ctx = hrt.quadd_ctx;
 623         /* static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 2); */
 624
 625         if (likely(!atomic_read(&hrt.active)))
 626                 return;
 627 /*
 628         if (__ratelimit(&ratelimit_state))
 629                 pr_info("sch_out: cpu: %d, prev: %u (%u) \t--> next: %u (%u)\n",
 630                         smp_processor_id(), (unsigned int)prev->pid,
 631                         (unsigned int)prev->tgid, (unsigned int)next->pid,
 632                         (unsigned int)next->tgid);
 633 */
 634
 635         if (is_sample_process(prev)) {
 636                 user_regs = task_pt_regs(prev);
 637                 if (user_regs)
 638                         read_all_sources(user_regs, prev);
 639
 640                 n = remove_active_thread(cpu_ctx, prev->pid);
 641                 atomic_sub(n, &cpu_ctx->nr_active);
 642
 643                 if (n && atomic_read(&cpu_ctx->nr_active) == 0) {
 644                         cancel_hrtimer(cpu_ctx);
 645                         atomic_dec(&hrt.nr_active_all_core);
 646
 647                         if (ctx->pmu)
 648                                 ctx->pmu->stop();
 649                 }
 650         }
 651
 652         if (is_trace_process(prev))
 653                 put_sched_sample(prev, 0);
 654 }
 655
 656 void __quadd_event_mmap(struct vm_area_struct *vma)
 657 {
 658         struct quadd_parameters *param;
 659
 660         if (likely(!atomic_read(&hrt.active)))
 661                 return;
 662
 663         if (!is_sample_process(current))
 664                 return;
 665
 666         param = &hrt.quadd_ctx->param;
 667         quadd_process_mmap(vma, param->pids[0]);
 668 }
 669
 670 static void reset_cpu_ctx(void)
 671 {
 672         int cpu_id;
 673         struct quadd_cpu_context *cpu_ctx;
 674         struct quadd_thread_data *t_data;
 675
 676         for (cpu_id = 0; cpu_id < nr_cpu_ids; cpu_id++) {
 677                 cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id);
 678                 t_data = &cpu_ctx->active_thread;
 679
 680                 atomic_set(&cpu_ctx->nr_active, 0);
 681
 682                 t_data->pid = -1;
 683                 t_data->tgid = -1;
 684         }
 685 }
 686
 687 int quadd_hrt_start(void)
 688 {
 689         int err;
 690         int cpuid;
 691         u64 period;
 692         long freq;
 693         unsigned int extra;
 694         struct quadd_ctx *ctx = hrt.quadd_ctx;
 695         struct quadd_parameters *param = &ctx->param;
 696
 697         freq = ctx->param.freq;
 698         freq = max_t(long, QUADD_HRT_MIN_FREQ, freq);
 699         period = NSEC_PER_SEC / freq;
 700         hrt.sample_period = period;
 701
 702         if (ctx->param.ma_freq > 0)
 703                 hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq;
 704         else
 705                 hrt.ma_period = 0;
 706
 707         atomic64_set(&hrt.counter_samples, 0);
 708         atomic64_set(&hrt.skipped_samples, 0);
 709
 710         reset_cpu_ctx();
 711
 712         extra = param->reserved[QUADD_PARAM_IDX_EXTRA];
 713
 714         if (param->backtrace) {
 715                 struct quadd_unw_methods *um = &hrt.um;
 716
 717                 um->fp = extra & QUADD_PARAM_EXTRA_BT_FP ? 1 : 0;
 718                 um->ut = extra & QUADD_PARAM_EXTRA_BT_UT ? 1 : 0;
 719                 um->ut_ce = extra & QUADD_PARAM_EXTRA_BT_UT_CE ? 1 : 0;
 720                 um->dwarf = extra & QUADD_PARAM_EXTRA_BT_DWARF ? 1 : 0;
 721
 722                 pr_info("unw methods: fp/ut/ut_ce/dwarf: %u/%u/%u/%u\n",
 723                         um->fp, um->ut, um->ut_ce, um->dwarf);
 724         }
 725
 726         if (hrt.tc && (extra & QUADD_PARAM_EXTRA_USE_ARCH_TIMER))
 727                 hrt.use_arch_timer = 1;
 728         else
 729                 hrt.use_arch_timer = 0;
 730
 731         pr_info("timer: %s\n", hrt.use_arch_timer ? "arch" : "monotonic clock");
 732
 733         hrt.get_stack_offset =
 734                 (extra & QUADD_PARAM_EXTRA_STACK_OFFSET) ? 1 : 0;
 735
 736         for_each_possible_cpu(cpuid)
 737                 put_header(cpuid);
 738
 739         if (extra & QUADD_PARAM_EXTRA_GET_MMAP) {
 740                 err = quadd_get_current_mmap(param->pids[0]);
 741                 if (err) {
 742                         pr_err("error: quadd_get_current_mmap\n");
 743                         return err;
 744                 }
 745         }
 746
 747         if (ctx->pl310)
 748                 ctx->pl310->start();
 749
 750         quadd_ma_start(&hrt);
 751
 752         atomic_set(&hrt.active, 1);
 753
 754         pr_info("Start hrt: freq/period: %ld/%llu\n", freq, period);
 755         return 0;
 756 }
 757
 758 void quadd_hrt_stop(void)
 759 {
 760         struct quadd_ctx *ctx = hrt.quadd_ctx;
 761
 762         pr_info("Stop hrt, samples all/skipped: %llu/%llu\n",
 763                 atomic64_read(&hrt.counter_samples),
 764                 atomic64_read(&hrt.skipped_samples));
 765
 766         if (ctx->pl310)
 767                 ctx->pl310->stop();
 768
 769         quadd_ma_stop(&hrt);
 770
 771         atomic_set(&hrt.active, 0);
 772
 773         atomic64_set(&hrt.counter_samples, 0);
 774         atomic64_set(&hrt.skipped_samples, 0);
 775
 776         /* reset_cpu_ctx(); */
 777 }
 778
 779 void quadd_hrt_deinit(void)
 780 {
 781         if (atomic_read(&hrt.active))
 782                 quadd_hrt_stop();
 783
 784         free_percpu(hrt.cpu_ctx);
 785 }
 786
 787 void quadd_hrt_get_state(struct quadd_module_state *state)
 788 {
 789         state->nr_all_samples = atomic64_read(&hrt.counter_samples);
 790         state->nr_skipped_samples = atomic64_read(&hrt.skipped_samples);
 791 }
 792
 793 static void init_arch_timer(void)
 794 {
 795         u32 cntkctl = arch_timer_get_cntkctl();
 796
 797         if (cntkctl & ARCH_TIMER_USR_VCT_ACCESS_EN)
 798                 hrt.tc = arch_timer_get_timecounter();
 799         else
 800                 hrt.tc = NULL;
 801 }
 802
 803 struct quadd_hrt_ctx *quadd_hrt_init(struct quadd_ctx *ctx)
 804 {
 805         int cpu_id;
 806         u64 period;
 807         long freq;
 808         struct quadd_cpu_context *cpu_ctx;
 809
 810         hrt.quadd_ctx = ctx;
 811         atomic_set(&hrt.active, 0);
 812
 813         freq = ctx->param.freq;
 814         freq = max_t(long, QUADD_HRT_MIN_FREQ, freq);
 815         period = NSEC_PER_SEC / freq;
 816         hrt.sample_period = period;
 817
 818         if (ctx->param.ma_freq > 0)
 819                 hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq;
 820         else
 821                 hrt.ma_period = 0;
 822
 823         atomic64_set(&hrt.counter_samples, 0);
 824         init_arch_timer();
 825
 826         hrt.cpu_ctx = alloc_percpu(struct quadd_cpu_context);
 827         if (!hrt.cpu_ctx)
 828                 return ERR_PTR(-ENOMEM);
 829
 830         for_each_possible_cpu(cpu_id) {
 831                 cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id);
 832
 833                 atomic_set(&cpu_ctx->nr_active, 0);
 834
 835                 cpu_ctx->active_thread.pid = -1;
 836                 cpu_ctx->active_thread.tgid = -1;
 837
 838                 cpu_ctx->cc.hrt = &hrt;
 839
 840                 init_hrtimer(cpu_ctx);
 841         }
 842
 843         return &hrt;
 844 }