drivers/misc/tegra-profiler/hrt.c

   1 /*
   2  * drivers/misc/tegra-profiler/hrt.c
   3  *
   4  * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
   5  *
   6  * This program is free software; you can redistribute it and/or modify it
   7  * under the terms and conditions of the GNU General Public License,
   8  * version 2, as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13  * more details.
  14  *
  15  */
  16
  17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  18
  19 #include <linux/sched.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/slab.h>
  22 #include <linux/cpu.h>
  23 #include <linux/ptrace.h>
  24 #include <linux/interrupt.h>
  25 #include <linux/err.h>
  26 #include <linux/nsproxy.h>
  27 #include <clocksource/arm_arch_timer.h>
  28
  29 #include <asm/cputype.h>
  30 #include <asm/irq_regs.h>
  31 #include <asm/arch_timer.h>
  32
  33 #include <linux/tegra_profiler.h>
  34
  35 #include "quadd.h"
  36 #include "hrt.h"
  37 #include "comm.h"
  38 #include "mmap.h"
  39 #include "ma.h"
  40 #include "power_clk.h"
  41 #include "tegra.h"
  42 #include "debug.h"
  43
  44 static struct quadd_hrt_ctx hrt;
  45
  46 static void
  47 read_all_sources(struct pt_regs *regs, struct task_struct *task);
  48
  49 struct hrt_event_value {
  50         int event_id;
  51         u32 value;
  52 };
  53
  54 static enum hrtimer_restart hrtimer_handler(struct hrtimer *hrtimer)
  55 {
  56         struct pt_regs *regs;
  57
  58         regs = get_irq_regs();
  59
  60         if (!hrt.active)
  61                 return HRTIMER_NORESTART;
  62
  63         qm_debug_handler_sample(regs);
  64
  65         if (regs)
  66                 read_all_sources(regs, NULL);
  67
  68         hrtimer_forward_now(hrtimer, ns_to_ktime(hrt.sample_period));
  69         qm_debug_timer_forward(regs, hrt.sample_period);
  70
  71         return HRTIMER_RESTART;
  72 }
  73
  74 static void start_hrtimer(struct quadd_cpu_context *cpu_ctx)
  75 {
  76         u64 period = hrt.sample_period;
  77
  78         __hrtimer_start_range_ns(&cpu_ctx->hrtimer,
  79                                  ns_to_ktime(period), 0,
  80                                  HRTIMER_MODE_REL_PINNED, 0);
  81         qm_debug_timer_start(NULL, period);
  82 }
  83
  84 static void cancel_hrtimer(struct quadd_cpu_context *cpu_ctx)
  85 {
  86         hrtimer_cancel(&cpu_ctx->hrtimer);
  87         qm_debug_timer_cancel();
  88 }
  89
  90 static void init_hrtimer(struct quadd_cpu_context *cpu_ctx)
  91 {
  92         hrtimer_init(&cpu_ctx->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  93         cpu_ctx->hrtimer.function = hrtimer_handler;
  94 }
  95
  96 static inline u64 get_posix_clock_monotonic_time(void)
  97 {
  98         struct timespec ts;
  99
 100         do_posix_clock_monotonic_gettime(&ts);
 101         return timespec_to_ns(&ts);
 102 }
 103
 104 static inline u64 get_arch_time(struct timecounter *tc)
 105 {
 106         cycle_t value;
 107         const struct cyclecounter *cc = tc->cc;
 108
 109         value = cc->read(cc);
 110         return cyclecounter_cyc2ns(cc, value);
 111 }
 112
 113 u64 quadd_get_time(void)
 114 {
 115         struct timecounter *tc = hrt.tc;
 116
 117         return (tc && hrt.use_arch_timer) ?
 118                 get_arch_time(tc) :
 119                 get_posix_clock_monotonic_time();
 120 }
 121
 122 static void
 123 put_sample_cpu(struct quadd_record_data *data,
 124                struct quadd_iovec *vec,
 125                int vec_count, int cpu_id)
 126 {
 127         ssize_t err;
 128         struct quadd_comm_data_interface *comm = hrt.quadd_ctx->comm;
 129
 130         err = comm->put_sample(data, vec, vec_count, cpu_id);
 131         if (err < 0)
 132                 atomic64_inc(&hrt.skipped_samples);
 133
 134         atomic64_inc(&hrt.counter_samples);
 135 }
 136
 137 void
 138 quadd_put_sample(struct quadd_record_data *data,
 139                  struct quadd_iovec *vec, int vec_count)
 140 {
 141         put_sample_cpu(data, vec, vec_count, -1);
 142 }
 143
 144 static void put_header(void)
 145 {
 146         int cpu_id;
 147         int nr_events = 0, max_events = QUADD_MAX_COUNTERS;
 148         int events[QUADD_MAX_COUNTERS];
 149         struct quadd_record_data record;
 150         struct quadd_header_data *hdr = &record.hdr;
 151         struct quadd_parameters *param = &hrt.quadd_ctx->param;
 152         unsigned int extra = param->reserved[QUADD_PARAM_IDX_EXTRA];
 153         struct quadd_iovec vec;
 154         struct quadd_ctx *ctx = hrt.quadd_ctx;
 155         struct quadd_event_source_interface *pmu = ctx->pmu;
 156         struct quadd_event_source_interface *pl310 = ctx->pl310;
 157
 158         record.record_type = QUADD_RECORD_TYPE_HEADER;
 159
 160         hdr->magic = QUADD_HEADER_MAGIC;
 161         hdr->version = QUADD_SAMPLES_VERSION;
 162
 163         hdr->backtrace = param->backtrace;
 164         hdr->use_freq = param->use_freq;
 165         hdr->system_wide = param->system_wide;
 166
 167         /* TODO: dynamically */
 168 #ifdef QM_DEBUG_SAMPLES_ENABLE
 169         hdr->debug_samples = 1;
 170 #else
 171         hdr->debug_samples = 0;
 172 #endif
 173
 174         hdr->freq = param->freq;
 175         hdr->ma_freq = param->ma_freq;
 176         hdr->power_rate_freq = param->power_rate_freq;
 177
 178         hdr->power_rate = hdr->power_rate_freq > 0 ? 1 : 0;
 179         hdr->get_mmap = (extra & QUADD_PARAM_EXTRA_GET_MMAP) ? 1 : 0;
 180
 181         hdr->reserved = 0;
 182         hdr->extra_length = 0;
 183
 184         hdr->reserved |= hrt.unw_method << QUADD_HDR_UNW_METHOD_SHIFT;
 185
 186         if (hrt.use_arch_timer)
 187                 hdr->reserved |= QUADD_HDR_USE_ARCH_TIMER;
 188
 189         if (hrt.get_stack_offset)
 190                 hdr->reserved |= QUADD_HDR_STACK_OFFSET;
 191
 192         if (pmu)
 193                 nr_events += pmu->get_current_events(events, max_events);
 194
 195         if (pl310)
 196                 nr_events += pl310->get_current_events(events + nr_events,
 197                                                        max_events - nr_events);
 198
 199         hdr->nr_events = nr_events;
 200
 201         vec.base = events;
 202         vec.len = nr_events * sizeof(events[0]);
 203
 204         for_each_possible_cpu(cpu_id)
 205                 put_sample_cpu(&record, &vec, 1, cpu_id);
 206 }
 207
 208 static void
 209 put_sched_sample(struct task_struct *task, int is_sched_in)
 210 {
 211         unsigned int cpu, flags;
 212         struct quadd_record_data record;
 213         struct quadd_sched_data *s = &record.sched;
 214
 215         record.record_type = QUADD_RECORD_TYPE_SCHED;
 216
 217         cpu = quadd_get_processor_id(NULL, &flags);
 218         s->cpu = cpu;
 219         s->lp_mode = (flags & QUADD_CPUMODE_TEGRA_POWER_CLUSTER_LP) ? 1 : 0;
 220
 221         s->sched_in = is_sched_in ? 1 : 0;
 222         s->time = quadd_get_time();
 223         s->pid = task->pid;
 224
 225         s->reserved = 0;
 226
 227         s->data[0] = 0;
 228         s->data[1] = 0;
 229
 230         quadd_put_sample(&record, NULL, 0);
 231 }
 232
 233 static int get_sample_data(struct quadd_sample_data *sample,
 234                            struct pt_regs *regs,
 235                            struct task_struct *task)
 236 {
 237         unsigned int cpu, flags;
 238         struct quadd_ctx *quadd_ctx = hrt.quadd_ctx;
 239
 240         cpu = quadd_get_processor_id(regs, &flags);
 241         sample->cpu = cpu;
 242
 243         sample->lp_mode =
 244                 (flags & QUADD_CPUMODE_TEGRA_POWER_CLUSTER_LP) ? 1 : 0;
 245         sample->thumb_mode = (flags & QUADD_CPUMODE_THUMB) ? 1 : 0;
 246         sample->user_mode = user_mode(regs) ? 1 : 0;
 247
 248         /* For security reasons, hide IPs from the kernel space. */
 249         if (!sample->user_mode && !quadd_ctx->collect_kernel_ips)
 250                 sample->ip = 0;
 251         else
 252                 sample->ip = instruction_pointer(regs);
 253
 254         sample->time = quadd_get_time();
 255         sample->reserved = 0;
 256         sample->pid = task->pid;
 257         sample->in_interrupt = in_interrupt() ? 1 : 0;
 258
 259         return 0;
 260 }
 261
 262 static int read_source(struct quadd_event_source_interface *source,
 263                        struct pt_regs *regs,
 264                        struct hrt_event_value *events_vals,
 265                        int max_events)
 266 {
 267         int nr_events, i;
 268         u32 prev_val, val, res_val;
 269         struct event_data events[QUADD_MAX_COUNTERS];
 270
 271         if (!source)
 272                 return 0;
 273
 274         max_events = min_t(int, max_events, QUADD_MAX_COUNTERS);
 275         nr_events = source->read(events, max_events);
 276
 277         for (i = 0; i < nr_events; i++) {
 278                 struct event_data *s = &events[i];
 279
 280                 prev_val = s->prev_val;
 281                 val = s->val;
 282
 283                 if (prev_val <= val)
 284                         res_val = val - prev_val;
 285                 else
 286                         res_val = QUADD_U32_MAX - prev_val + val;
 287
 288                 if (s->event_source == QUADD_EVENT_SOURCE_PL310) {
 289                         int nr_active = atomic_read(&hrt.nr_active_all_core);
 290                         if (nr_active > 1)
 291                                 res_val /= nr_active;
 292                 }
 293
 294                 events_vals[i].event_id = s->event_id;
 295                 events_vals[i].value = res_val;
 296         }
 297
 298         return nr_events;
 299 }
 300
 301 static long
 302 get_stack_offset(struct task_struct *task,
 303                  struct pt_regs *regs,
 304                  struct quadd_callchain *cc)
 305 {
 306         unsigned long sp;
 307         struct vm_area_struct *vma;
 308         struct mm_struct *mm = task->mm;
 309
 310         if (!regs || !mm)
 311                 return -ENOMEM;
 312
 313         sp = cc->nr > 0 ? cc->curr_sp :
 314                 quadd_user_stack_pointer(regs);
 315
 316         vma = find_vma(mm, sp);
 317         if (!vma)
 318                 return -ENOMEM;
 319
 320         return vma->vm_end - sp;
 321 }
 322
 323 static void
 324 read_all_sources(struct pt_regs *regs, struct task_struct *task)
 325 {
 326         u32 state, extra_data = 0;
 327         int i, vec_idx = 0, bt_size = 0;
 328         int nr_events = 0, nr_positive_events = 0;
 329         struct pt_regs *user_regs;
 330         struct quadd_iovec vec[5];
 331         struct hrt_event_value events[QUADD_MAX_COUNTERS];
 332         u32 events_extra[QUADD_MAX_COUNTERS];
 333
 334         struct quadd_record_data record_data;
 335         struct quadd_sample_data *s = &record_data.sample;
 336
 337         struct quadd_ctx *ctx = hrt.quadd_ctx;
 338         struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
 339         struct quadd_callchain *cc = &cpu_ctx->cc;
 340
 341         if (!regs)
 342                 return;
 343
 344         if (atomic_read(&cpu_ctx->nr_active) == 0)
 345                 return;
 346
 347         if (!task)
 348                 task = current;
 349
 350         rcu_read_lock();
 351         if (!task_nsproxy(task)) {
 352                 rcu_read_unlock();
 353                 return;
 354         }
 355         rcu_read_unlock();
 356
 357         if (ctx->pmu && ctx->pmu_info.active)
 358                 nr_events += read_source(ctx->pmu, regs,
 359                                          events, QUADD_MAX_COUNTERS);
 360
 361         if (ctx->pl310 && ctx->pl310_info.active)
 362                 nr_events += read_source(ctx->pl310, regs,
 363                                          events + nr_events,
 364                                          QUADD_MAX_COUNTERS - nr_events);
 365
 366         if (!nr_events)
 367                 return;
 368
 369         if (user_mode(regs))
 370                 user_regs = regs;
 371         else
 372                 user_regs = current_pt_regs();
 373
 374         if (get_sample_data(s, regs, task))
 375                 return;
 376
 377         vec[vec_idx].base = &extra_data;
 378         vec[vec_idx].len = sizeof(extra_data);
 379         vec_idx++;
 380
 381         s->reserved = 0;
 382
 383         cc->nr = 0;
 384         cc->curr_sp = 0;
 385         cc->curr_fp = 0;
 386         cc->curr_pc = 0;
 387
 388         if (ctx->param.backtrace) {
 389                 cc->unw_method = hrt.unw_method;
 390                 bt_size = quadd_get_user_callchain(user_regs, cc, ctx, task);
 391
 392                 if (!bt_size && !user_mode(regs)) {
 393                         unsigned long pc = instruction_pointer(user_regs);
 394
 395                         cc->nr = 0;
 396 #ifdef CONFIG_ARM64
 397                         cc->cs_64 = compat_user_mode(user_regs) ? 0 : 1;
 398 #else
 399                         cc->cs_64 = 0;
 400 #endif
 401                         bt_size += quadd_callchain_store(cc, pc,
 402                                                          QUADD_UNW_TYPE_KCTX);
 403                 }
 404
 405                 if (bt_size > 0) {
 406                         int ip_size = cc->cs_64 ? sizeof(u64) : sizeof(u32);
 407                         int nr_types = DIV_ROUND_UP(bt_size, 8);
 408
 409                         vec[vec_idx].base = cc->cs_64 ?
 410                                 (void *)cc->ip_64 : (void *)cc->ip_32;
 411                         vec[vec_idx].len = bt_size * ip_size;
 412                         vec_idx++;
 413
 414                         vec[vec_idx].base = cc->types;
 415                         vec[vec_idx].len = nr_types * sizeof(cc->types[0]);
 416                         vec_idx++;
 417
 418                         if (cc->cs_64)
 419                                 extra_data |= QUADD_SED_IP64;
 420                 }
 421
 422                 extra_data |= cc->unw_method << QUADD_SED_UNW_METHOD_SHIFT;
 423                 s->reserved |= cc->unw_rc << QUADD_SAMPLE_URC_SHIFT;
 424         }
 425         s->callchain_nr = bt_size;
 426
 427         if (hrt.get_stack_offset) {
 428                 long offset = get_stack_offset(task, user_regs, cc);
 429                 if (offset > 0) {
 430                         u32 off = offset >> 2;
 431                         off = min_t(u32, off, 0xffff);
 432                         extra_data |= off << QUADD_SED_STACK_OFFSET_SHIFT;
 433                 }
 434         }
 435
 436         record_data.record_type = QUADD_RECORD_TYPE_SAMPLE;
 437
 438         s->events_flags = 0;
 439         for (i = 0; i < nr_events; i++) {
 440                 u32 value = events[i].value;
 441                 if (value > 0) {
 442                         s->events_flags |= 1 << i;
 443                         events_extra[nr_positive_events++] = value;
 444                 }
 445         }
 446
 447         if (nr_positive_events == 0)
 448                 return;
 449
 450         vec[vec_idx].base = events_extra;
 451         vec[vec_idx].len = nr_positive_events * sizeof(events_extra[0]);
 452         vec_idx++;
 453
 454         state = task->state;
 455         if (state) {
 456                 s->state = 1;
 457                 vec[vec_idx].base = &state;
 458                 vec[vec_idx].len = sizeof(state);
 459                 vec_idx++;
 460         } else {
 461                 s->state = 0;
 462         }
 463
 464         quadd_put_sample(&record_data, vec, vec_idx);
 465 }
 466
 467 static inline int
 468 is_profile_process(struct task_struct *task)
 469 {
 470         int i;
 471         pid_t pid, profile_pid;
 472         struct quadd_ctx *ctx = hrt.quadd_ctx;
 473
 474         if (!task)
 475                 return 0;
 476
 477         pid = task->tgid;
 478
 479         for (i = 0; i < ctx->param.nr_pids; i++) {
 480                 profile_pid = ctx->param.pids[i];
 481                 if (profile_pid == pid)
 482                         return 1;
 483         }
 484         return 0;
 485 }
 486
 487 static int
 488 add_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid, pid_t tgid)
 489 {
 490         struct quadd_thread_data *t_data = &cpu_ctx->active_thread;
 491
 492         if (t_data->pid > 0 ||
 493                 atomic_read(&cpu_ctx->nr_active) > 0) {
 494                 pr_warn_once("Warning for thread: %d\n", (int)pid);
 495                 return 0;
 496         }
 497
 498         t_data->pid = pid;
 499         t_data->tgid = tgid;
 500         return 1;
 501 }
 502
 503 static int remove_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid)
 504 {
 505         struct quadd_thread_data *t_data = &cpu_ctx->active_thread;
 506
 507         if (t_data->pid < 0)
 508                 return 0;
 509
 510         if (t_data->pid == pid) {
 511                 t_data->pid = -1;
 512                 t_data->tgid = -1;
 513                 return 1;
 514         }
 515
 516         pr_warn_once("Warning for thread: %d\n", (int)pid);
 517         return 0;
 518 }
 519
 520 void __quadd_task_sched_in(struct task_struct *prev,
 521                            struct task_struct *task)
 522 {
 523         struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
 524         struct quadd_ctx *ctx = hrt.quadd_ctx;
 525         struct event_data events[QUADD_MAX_COUNTERS];
 526         /* static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 2); */
 527
 528         if (likely(!hrt.active))
 529                 return;
 530 /*
 531         if (__ratelimit(&ratelimit_state))
 532                 pr_info("sch_in, cpu: %d, prev: %u (%u) \t--> curr: %u (%u)\n",
 533                         smp_processor_id(), (unsigned int)prev->pid,
 534                         (unsigned int)prev->tgid, (unsigned int)task->pid,
 535                         (unsigned int)task->tgid);
 536 */
 537
 538         if (is_profile_process(task)) {
 539                 put_sched_sample(task, 1);
 540
 541                 add_active_thread(cpu_ctx, task->pid, task->tgid);
 542                 atomic_inc(&cpu_ctx->nr_active);
 543
 544                 if (atomic_read(&cpu_ctx->nr_active) == 1) {
 545                         if (ctx->pmu)
 546                                 ctx->pmu->start();
 547
 548                         if (ctx->pl310)
 549                                 ctx->pl310->read(events, 1);
 550
 551                         start_hrtimer(cpu_ctx);
 552                         atomic_inc(&hrt.nr_active_all_core);
 553                 }
 554         }
 555 }
 556
 557 void __quadd_task_sched_out(struct task_struct *prev,
 558                             struct task_struct *next)
 559 {
 560         int n;
 561         struct pt_regs *user_regs;
 562         struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
 563         struct quadd_ctx *ctx = hrt.quadd_ctx;
 564         /* static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 2); */
 565
 566         if (likely(!hrt.active))
 567                 return;
 568 /*
 569         if (__ratelimit(&ratelimit_state))
 570                 pr_info("sch_out: cpu: %d, prev: %u (%u) \t--> next: %u (%u)\n",
 571                         smp_processor_id(), (unsigned int)prev->pid,
 572                         (unsigned int)prev->tgid, (unsigned int)next->pid,
 573                         (unsigned int)next->tgid);
 574 */
 575
 576         if (is_profile_process(prev)) {
 577                 user_regs = task_pt_regs(prev);
 578                 if (user_regs)
 579                         read_all_sources(user_regs, prev);
 580
 581                 n = remove_active_thread(cpu_ctx, prev->pid);
 582                 atomic_sub(n, &cpu_ctx->nr_active);
 583
 584                 if (n && atomic_read(&cpu_ctx->nr_active) == 0) {
 585                         cancel_hrtimer(cpu_ctx);
 586                         atomic_dec(&hrt.nr_active_all_core);
 587
 588                         if (ctx->pmu)
 589                                 ctx->pmu->stop();
 590                 }
 591
 592                 put_sched_sample(prev, 0);
 593         }
 594 }
 595
 596 void __quadd_event_mmap(struct vm_area_struct *vma)
 597 {
 598         struct quadd_parameters *param;
 599
 600         if (likely(!hrt.active))
 601                 return;
 602
 603         if (!is_profile_process(current))
 604                 return;
 605
 606         param = &hrt.quadd_ctx->param;
 607         quadd_process_mmap(vma, param->pids[0]);
 608 }
 609
 610 static void reset_cpu_ctx(void)
 611 {
 612         int cpu_id;
 613         struct quadd_cpu_context *cpu_ctx;
 614         struct quadd_thread_data *t_data;
 615
 616         for (cpu_id = 0; cpu_id < nr_cpu_ids; cpu_id++) {
 617                 cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id);
 618                 t_data = &cpu_ctx->active_thread;
 619
 620                 atomic_set(&cpu_ctx->nr_active, 0);
 621
 622                 t_data->pid = -1;
 623                 t_data->tgid = -1;
 624         }
 625 }
 626
 627 int quadd_hrt_start(void)
 628 {
 629         int err;
 630         u64 period;
 631         long freq;
 632         unsigned int extra;
 633         struct quadd_ctx *ctx = hrt.quadd_ctx;
 634         struct quadd_parameters *param = &ctx->param;
 635
 636         freq = ctx->param.freq;
 637         freq = max_t(long, QUADD_HRT_MIN_FREQ, freq);
 638         period = NSEC_PER_SEC / freq;
 639         hrt.sample_period = period;
 640
 641         if (ctx->param.ma_freq > 0)
 642                 hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq;
 643         else
 644                 hrt.ma_period = 0;
 645
 646         atomic64_set(&hrt.counter_samples, 0);
 647         atomic64_set(&hrt.skipped_samples, 0);
 648
 649         reset_cpu_ctx();
 650
 651         extra = param->reserved[QUADD_PARAM_IDX_EXTRA];
 652
 653         if (extra & QUADD_PARAM_EXTRA_BT_MIXED)
 654                 hrt.unw_method = QUADD_UNW_METHOD_MIXED;
 655         else if (extra & QUADD_PARAM_EXTRA_BT_UNWIND_TABLES)
 656                 hrt.unw_method = QUADD_UNW_METHOD_EHT;
 657         else if (extra & QUADD_PARAM_EXTRA_BT_FP)
 658                 hrt.unw_method = QUADD_UNW_METHOD_FP;
 659         else
 660                 hrt.unw_method = QUADD_UNW_METHOD_NONE;
 661
 662         if (hrt.tc && (extra & QUADD_PARAM_EXTRA_USE_ARCH_TIMER))
 663                 hrt.use_arch_timer = 1;
 664         else
 665                 hrt.use_arch_timer = 0;
 666
 667         pr_info("timer: %s\n", hrt.use_arch_timer ? "arch" : "monotonic clock");
 668
 669         hrt.get_stack_offset =
 670                 (extra & QUADD_PARAM_EXTRA_STACK_OFFSET) ? 1 : 0;
 671
 672         put_header();
 673
 674         if (extra & QUADD_PARAM_EXTRA_GET_MMAP) {
 675                 err = quadd_get_current_mmap(param->pids[0]);
 676                 if (err) {
 677                         pr_err("error: quadd_get_current_mmap\n");
 678                         return err;
 679                 }
 680         }
 681
 682         if (ctx->pl310)
 683                 ctx->pl310->start();
 684
 685         quadd_ma_start(&hrt);
 686
 687         hrt.active = 1;
 688
 689         pr_info("Start hrt: freq/period: %ld/%llu\n", freq, period);
 690         return 0;
 691 }
 692
 693 void quadd_hrt_stop(void)
 694 {
 695         struct quadd_ctx *ctx = hrt.quadd_ctx;
 696
 697         pr_info("Stop hrt, samples all/skipped: %llu/%llu\n",
 698                 atomic64_read(&hrt.counter_samples),
 699                 atomic64_read(&hrt.skipped_samples));
 700
 701         if (ctx->pl310)
 702                 ctx->pl310->stop();
 703
 704         quadd_ma_stop(&hrt);
 705
 706         hrt.active = 0;
 707
 708         atomic64_set(&hrt.counter_samples, 0);
 709         atomic64_set(&hrt.skipped_samples, 0);
 710
 711         /* reset_cpu_ctx(); */
 712 }
 713
 714 void quadd_hrt_deinit(void)
 715 {
 716         if (hrt.active)
 717                 quadd_hrt_stop();
 718
 719         free_percpu(hrt.cpu_ctx);
 720 }
 721
 722 void quadd_hrt_get_state(struct quadd_module_state *state)
 723 {
 724         state->nr_all_samples = atomic64_read(&hrt.counter_samples);
 725         state->nr_skipped_samples = atomic64_read(&hrt.skipped_samples);
 726 }
 727
 728 static void init_arch_timer(void)
 729 {
 730         u32 cntkctl = arch_timer_get_cntkctl();
 731
 732         if (cntkctl & ARCH_TIMER_USR_VCT_ACCESS_EN)
 733                 hrt.tc = arch_timer_get_timecounter();
 734         else
 735                 hrt.tc = NULL;
 736 }
 737
 738 struct quadd_hrt_ctx *quadd_hrt_init(struct quadd_ctx *ctx)
 739 {
 740         int cpu_id;
 741         u64 period;
 742         long freq;
 743         struct quadd_cpu_context *cpu_ctx;
 744
 745         hrt.quadd_ctx = ctx;
 746         hrt.active = 0;
 747
 748         freq = ctx->param.freq;
 749         freq = max_t(long, QUADD_HRT_MIN_FREQ, freq);
 750         period = NSEC_PER_SEC / freq;
 751         hrt.sample_period = period;
 752
 753         if (ctx->param.ma_freq > 0)
 754                 hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq;
 755         else
 756                 hrt.ma_period = 0;
 757
 758         atomic64_set(&hrt.counter_samples, 0);
 759         init_arch_timer();
 760
 761         hrt.cpu_ctx = alloc_percpu(struct quadd_cpu_context);
 762         if (!hrt.cpu_ctx)
 763                 return ERR_PTR(-ENOMEM);
 764
 765         for_each_possible_cpu(cpu_id) {
 766                 cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id);
 767
 768                 atomic_set(&cpu_ctx->nr_active, 0);
 769
 770                 cpu_ctx->active_thread.pid = -1;
 771                 cpu_ctx->active_thread.tgid = -1;
 772
 773                 cpu_ctx->cc.hrt = &hrt;
 774
 775                 init_hrtimer(cpu_ctx);
 776         }
 777
 778         return &hrt;
 779 }