2 * drivers/misc/tegra-profiler/hrt.c
4 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19 #include <linux/sched.h>
20 #include <linux/hrtimer.h>
21 #include <linux/slab.h>
22 #include <linux/cpu.h>
23 #include <linux/ptrace.h>
24 #include <linux/interrupt.h>
25 #include <linux/err.h>
26 #include <clocksource/arm_arch_timer.h>
28 #include <asm/cputype.h>
29 #include <asm/irq_regs.h>
30 #include <asm/arch_timer.h>
32 #include <linux/tegra_profiler.h>
39 #include "power_clk.h"
43 static struct quadd_hrt_ctx hrt;
46 read_all_sources(struct pt_regs *regs, struct task_struct *task);
48 struct hrt_event_value {
53 static inline u32 get_task_state(struct task_struct *task)
55 return (u32)(task->state | task->exit_state);
58 static enum hrtimer_restart hrtimer_handler(struct hrtimer *hrtimer)
62 regs = get_irq_regs();
64 if (!atomic_read(&hrt.active))
65 return HRTIMER_NORESTART;
67 qm_debug_handler_sample(regs);
70 read_all_sources(regs, current);
72 hrtimer_forward_now(hrtimer, ns_to_ktime(hrt.sample_period));
73 qm_debug_timer_forward(regs, hrt.sample_period);
75 return HRTIMER_RESTART;
78 static void start_hrtimer(struct quadd_cpu_context *cpu_ctx)
80 u64 period = hrt.sample_period;
82 __hrtimer_start_range_ns(&cpu_ctx->hrtimer,
83 ns_to_ktime(period), 0,
84 HRTIMER_MODE_REL_PINNED, 0);
85 qm_debug_timer_start(NULL, period);
88 static void cancel_hrtimer(struct quadd_cpu_context *cpu_ctx)
90 hrtimer_cancel(&cpu_ctx->hrtimer);
91 qm_debug_timer_cancel();
94 static void init_hrtimer(struct quadd_cpu_context *cpu_ctx)
96 hrtimer_init(&cpu_ctx->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
97 cpu_ctx->hrtimer.function = hrtimer_handler;
100 static inline u64 get_posix_clock_monotonic_time(void)
104 do_posix_clock_monotonic_gettime(&ts);
105 return timespec_to_ns(&ts);
108 static inline u64 get_arch_time(struct timecounter *tc)
111 const struct cyclecounter *cc = tc->cc;
113 value = cc->read(cc);
114 return cyclecounter_cyc2ns(cc, value);
117 u64 quadd_get_time(void)
119 struct timecounter *tc = hrt.tc;
121 return (tc && hrt.use_arch_timer) ?
123 get_posix_clock_monotonic_time();
127 __put_sample(struct quadd_record_data *data,
128 struct quadd_iovec *vec,
129 int vec_count, int cpu_id)
132 struct quadd_comm_data_interface *comm = hrt.quadd_ctx->comm;
134 err = comm->put_sample(data, vec, vec_count, cpu_id);
136 atomic64_inc(&hrt.skipped_samples);
138 atomic64_inc(&hrt.counter_samples);
142 quadd_put_sample_this_cpu(struct quadd_record_data *data,
143 struct quadd_iovec *vec, int vec_count)
145 __put_sample(data, vec, vec_count, -1);
149 quadd_put_sample(struct quadd_record_data *data,
150 struct quadd_iovec *vec, int vec_count)
152 __put_sample(data, vec, vec_count, 0);
155 static void put_header(int cpuid)
157 int nr_events = 0, max_events = QUADD_MAX_COUNTERS;
158 int events[QUADD_MAX_COUNTERS];
159 struct quadd_record_data record;
160 struct quadd_header_data *hdr = &record.hdr;
161 struct quadd_parameters *param = &hrt.quadd_ctx->param;
162 unsigned int extra = param->reserved[QUADD_PARAM_IDX_EXTRA];
163 struct quadd_iovec vec[2];
164 struct quadd_ctx *ctx = hrt.quadd_ctx;
165 struct quadd_event_source_interface *pmu = ctx->pmu;
166 struct quadd_event_source_interface *pl310 = ctx->pl310;
167 u32 cpuid_data = cpuid;
169 record.record_type = QUADD_RECORD_TYPE_HEADER;
171 hdr->magic = QUADD_HEADER_MAGIC;
172 hdr->version = QUADD_SAMPLES_VERSION;
174 hdr->backtrace = param->backtrace;
175 hdr->use_freq = param->use_freq;
176 hdr->system_wide = param->system_wide;
178 /* TODO: dynamically */
179 #ifdef QM_DEBUG_SAMPLES_ENABLE
180 hdr->debug_samples = 1;
182 hdr->debug_samples = 0;
185 hdr->freq = param->freq;
186 hdr->ma_freq = param->ma_freq;
187 hdr->power_rate_freq = param->power_rate_freq;
189 hdr->power_rate = hdr->power_rate_freq > 0 ? 1 : 0;
190 hdr->get_mmap = (extra & QUADD_PARAM_EXTRA_GET_MMAP) ? 1 : 0;
193 hdr->extra_length = 0;
195 if (hdr->backtrace) {
196 struct quadd_unw_methods *um = &hrt.um;
198 hdr->reserved |= um->fp ? QUADD_HDR_BT_FP : 0;
199 hdr->reserved |= um->ut ? QUADD_HDR_BT_UT : 0;
200 hdr->reserved |= um->ut_ce ? QUADD_HDR_BT_UT_CE : 0;
201 hdr->reserved |= um->dwarf ? QUADD_HDR_BT_DWARF : 0;
204 if (hrt.use_arch_timer)
205 hdr->reserved |= QUADD_HDR_USE_ARCH_TIMER;
207 if (hrt.get_stack_offset)
208 hdr->reserved |= QUADD_HDR_STACK_OFFSET;
210 hdr->reserved |= QUADD_HDR_HAS_CPUID;
213 nr_events += pmu->get_current_events(cpuid, events + nr_events,
214 max_events - nr_events);
217 nr_events += pl310->get_current_events(cpuid,
219 max_events - nr_events);
221 hdr->nr_events = nr_events;
223 vec[0].base = events;
224 vec[0].len = nr_events * sizeof(events[0]);
226 vec[1].base = &cpuid_data;
227 vec[1].len = sizeof(cpuid_data);
229 __put_sample(&record, &vec[0], 2, cpuid);
233 put_sched_sample(struct task_struct *task, int is_sched_in)
235 unsigned int cpu, flags;
236 struct quadd_record_data record;
237 struct quadd_sched_data *s = &record.sched;
239 record.record_type = QUADD_RECORD_TYPE_SCHED;
241 cpu = quadd_get_processor_id(NULL, &flags);
243 s->lp_mode = (flags & QUADD_CPUMODE_TEGRA_POWER_CLUSTER_LP) ? 1 : 0;
245 s->sched_in = is_sched_in ? 1 : 0;
246 s->time = quadd_get_time();
248 s->tgid = task->tgid;
252 s->data[QUADD_SCHED_IDX_TASK_STATE] = get_task_state(task);
253 s->data[QUADD_SCHED_IDX_RESERVED] = 0;
255 quadd_put_sample_this_cpu(&record, NULL, 0);
258 static int get_sample_data(struct quadd_sample_data *sample,
259 struct pt_regs *regs,
260 struct task_struct *task)
262 unsigned int cpu, flags;
263 struct quadd_ctx *quadd_ctx = hrt.quadd_ctx;
265 cpu = quadd_get_processor_id(regs, &flags);
269 (flags & QUADD_CPUMODE_TEGRA_POWER_CLUSTER_LP) ? 1 : 0;
270 sample->thumb_mode = (flags & QUADD_CPUMODE_THUMB) ? 1 : 0;
271 sample->user_mode = user_mode(regs) ? 1 : 0;
273 /* For security reasons, hide IPs from the kernel space. */
274 if (!sample->user_mode && !quadd_ctx->collect_kernel_ips)
277 sample->ip = instruction_pointer(regs);
279 sample->time = quadd_get_time();
280 sample->reserved = 0;
281 sample->pid = task->pid;
282 sample->tgid = task->tgid;
283 sample->in_interrupt = in_interrupt() ? 1 : 0;
288 static int read_source(struct quadd_event_source_interface *source,
289 struct pt_regs *regs,
290 struct hrt_event_value *events_vals,
294 u32 prev_val, val, res_val;
295 struct event_data events[QUADD_MAX_COUNTERS];
300 max_events = min_t(int, max_events, QUADD_MAX_COUNTERS);
301 nr_events = source->read(events, max_events);
303 for (i = 0; i < nr_events; i++) {
304 struct event_data *s = &events[i];
306 prev_val = s->prev_val;
310 res_val = val - prev_val;
312 res_val = QUADD_U32_MAX - prev_val + val;
314 if (s->event_source == QUADD_EVENT_SOURCE_PL310) {
315 int nr_active = atomic_read(&hrt.nr_active_all_core);
318 res_val /= nr_active;
321 events_vals[i].event_id = s->event_id;
322 events_vals[i].value = res_val;
329 get_stack_offset(struct task_struct *task,
330 struct pt_regs *regs,
331 struct quadd_callchain *cc)
334 struct vm_area_struct *vma;
335 struct mm_struct *mm = task->mm;
340 sp = cc->nr > 0 ? cc->curr_sp :
341 quadd_user_stack_pointer(regs);
343 vma = find_vma(mm, sp);
347 return vma->vm_end - sp;
351 read_all_sources(struct pt_regs *regs, struct task_struct *task)
353 u32 state, extra_data = 0, urcs = 0;
354 int i, vec_idx = 0, bt_size = 0;
355 int nr_events = 0, nr_positive_events = 0;
356 struct pt_regs *user_regs;
357 struct quadd_iovec vec[6];
358 struct hrt_event_value events[QUADD_MAX_COUNTERS];
359 u32 events_extra[QUADD_MAX_COUNTERS];
361 struct quadd_record_data record_data;
362 struct quadd_sample_data *s = &record_data.sample;
364 struct quadd_ctx *ctx = hrt.quadd_ctx;
365 struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
366 struct quadd_callchain *cc = &cpu_ctx->cc;
368 if (atomic_read(&cpu_ctx->nr_active) == 0)
371 if (task->flags & PF_EXITING)
374 if (ctx->pmu && ctx->get_pmu_info()->active)
375 nr_events += read_source(ctx->pmu, regs,
376 events, QUADD_MAX_COUNTERS);
378 if (ctx->pl310 && ctx->pl310_info.active)
379 nr_events += read_source(ctx->pl310, regs,
381 QUADD_MAX_COUNTERS - nr_events);
389 user_regs = current_pt_regs();
391 if (get_sample_data(s, regs, task))
394 vec[vec_idx].base = &extra_data;
395 vec[vec_idx].len = sizeof(extra_data);
406 if (ctx->param.backtrace) {
409 bt_size = quadd_get_user_callchain(user_regs, cc, ctx, task);
411 if (!bt_size && !user_mode(regs)) {
412 unsigned long pc = instruction_pointer(user_regs);
416 cc->cs_64 = compat_user_mode(user_regs) ? 0 : 1;
420 bt_size += quadd_callchain_store(cc, pc,
421 QUADD_UNW_TYPE_KCTX);
425 int ip_size = cc->cs_64 ? sizeof(u64) : sizeof(u32);
426 int nr_types = DIV_ROUND_UP(bt_size, 8);
428 vec[vec_idx].base = cc->cs_64 ?
429 (void *)cc->ip_64 : (void *)cc->ip_32;
430 vec[vec_idx].len = bt_size * ip_size;
433 vec[vec_idx].base = cc->types;
434 vec[vec_idx].len = nr_types * sizeof(cc->types[0]);
438 extra_data |= QUADD_SED_IP64;
441 urcs |= (cc->urc_fp & QUADD_SAMPLE_URC_MASK) <<
442 QUADD_SAMPLE_URC_SHIFT_FP;
443 urcs |= (cc->urc_ut & QUADD_SAMPLE_URC_MASK) <<
444 QUADD_SAMPLE_URC_SHIFT_UT;
445 urcs |= (cc->urc_dwarf & QUADD_SAMPLE_URC_MASK) <<
446 QUADD_SAMPLE_URC_SHIFT_DWARF;
448 s->reserved |= QUADD_SAMPLE_RES_URCS_ENABLED;
450 vec[vec_idx].base = &urcs;
451 vec[vec_idx].len = sizeof(urcs);
454 s->callchain_nr = bt_size;
456 if (hrt.get_stack_offset) {
457 long offset = get_stack_offset(task, user_regs, cc);
460 u32 off = offset >> 2;
462 off = min_t(u32, off, 0xffff);
463 extra_data |= off << QUADD_SED_STACK_OFFSET_SHIFT;
467 record_data.record_type = QUADD_RECORD_TYPE_SAMPLE;
470 for (i = 0; i < nr_events; i++) {
471 u32 value = events[i].value;
474 s->events_flags |= 1 << i;
475 events_extra[nr_positive_events++] = value;
479 if (nr_positive_events == 0)
482 vec[vec_idx].base = events_extra;
483 vec[vec_idx].len = nr_positive_events * sizeof(events_extra[0]);
486 state = get_task_state(task);
489 vec[vec_idx].base = &state;
490 vec[vec_idx].len = sizeof(state);
496 quadd_put_sample_this_cpu(&record_data, vec, vec_idx);
500 is_sample_process(struct task_struct *task)
503 pid_t pid, profile_pid;
504 struct quadd_ctx *ctx = hrt.quadd_ctx;
511 for (i = 0; i < ctx->param.nr_pids; i++) {
512 profile_pid = ctx->param.pids[i];
513 if (profile_pid == pid)
520 is_swapper_task(struct task_struct *task)
529 is_trace_process(struct task_struct *task)
531 struct quadd_ctx *ctx = hrt.quadd_ctx;
536 if (is_swapper_task(task))
539 if (ctx->param.trace_all_tasks)
542 return is_sample_process(task);
546 add_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid, pid_t tgid)
548 struct quadd_thread_data *t_data = &cpu_ctx->active_thread;
550 if (t_data->pid > 0 ||
551 atomic_read(&cpu_ctx->nr_active) > 0) {
552 pr_warn_once("Warning for thread: %d\n", (int)pid);
561 static int remove_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid)
563 struct quadd_thread_data *t_data = &cpu_ctx->active_thread;
568 if (t_data->pid == pid) {
574 pr_warn_once("Warning for thread: %d\n", (int)pid);
578 void __quadd_task_sched_in(struct task_struct *prev,
579 struct task_struct *task)
581 struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
582 struct quadd_ctx *ctx = hrt.quadd_ctx;
583 struct event_data events[QUADD_MAX_COUNTERS];
584 /* static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 2); */
586 if (likely(!atomic_read(&hrt.active)))
589 if (__ratelimit(&ratelimit_state))
590 pr_info("sch_in, cpu: %d, prev: %u (%u) \t--> curr: %u (%u)\n",
591 smp_processor_id(), (unsigned int)prev->pid,
592 (unsigned int)prev->tgid, (unsigned int)task->pid,
593 (unsigned int)task->tgid);
596 if (is_trace_process(task))
597 put_sched_sample(task, 1);
599 if (is_sample_process(task)) {
600 add_active_thread(cpu_ctx, task->pid, task->tgid);
601 atomic_inc(&cpu_ctx->nr_active);
603 if (atomic_read(&cpu_ctx->nr_active) == 1) {
608 ctx->pl310->read(events, 1);
610 start_hrtimer(cpu_ctx);
611 atomic_inc(&hrt.nr_active_all_core);
616 void __quadd_task_sched_out(struct task_struct *prev,
617 struct task_struct *next)
620 struct pt_regs *user_regs;
621 struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
622 struct quadd_ctx *ctx = hrt.quadd_ctx;
623 /* static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 2); */
625 if (likely(!atomic_read(&hrt.active)))
628 if (__ratelimit(&ratelimit_state))
629 pr_info("sch_out: cpu: %d, prev: %u (%u) \t--> next: %u (%u)\n",
630 smp_processor_id(), (unsigned int)prev->pid,
631 (unsigned int)prev->tgid, (unsigned int)next->pid,
632 (unsigned int)next->tgid);
635 if (is_sample_process(prev)) {
636 user_regs = task_pt_regs(prev);
638 read_all_sources(user_regs, prev);
640 n = remove_active_thread(cpu_ctx, prev->pid);
641 atomic_sub(n, &cpu_ctx->nr_active);
643 if (n && atomic_read(&cpu_ctx->nr_active) == 0) {
644 cancel_hrtimer(cpu_ctx);
645 atomic_dec(&hrt.nr_active_all_core);
652 if (is_trace_process(prev))
653 put_sched_sample(prev, 0);
656 void __quadd_event_mmap(struct vm_area_struct *vma)
658 struct quadd_parameters *param;
660 if (likely(!atomic_read(&hrt.active)))
663 if (!is_sample_process(current))
666 param = &hrt.quadd_ctx->param;
667 quadd_process_mmap(vma, param->pids[0]);
670 static void reset_cpu_ctx(void)
673 struct quadd_cpu_context *cpu_ctx;
674 struct quadd_thread_data *t_data;
676 for (cpu_id = 0; cpu_id < nr_cpu_ids; cpu_id++) {
677 cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id);
678 t_data = &cpu_ctx->active_thread;
680 atomic_set(&cpu_ctx->nr_active, 0);
687 int quadd_hrt_start(void)
694 struct quadd_ctx *ctx = hrt.quadd_ctx;
695 struct quadd_parameters *param = &ctx->param;
697 freq = ctx->param.freq;
698 freq = max_t(long, QUADD_HRT_MIN_FREQ, freq);
699 period = NSEC_PER_SEC / freq;
700 hrt.sample_period = period;
702 if (ctx->param.ma_freq > 0)
703 hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq;
707 atomic64_set(&hrt.counter_samples, 0);
708 atomic64_set(&hrt.skipped_samples, 0);
712 extra = param->reserved[QUADD_PARAM_IDX_EXTRA];
714 if (param->backtrace) {
715 struct quadd_unw_methods *um = &hrt.um;
717 um->fp = extra & QUADD_PARAM_EXTRA_BT_FP ? 1 : 0;
718 um->ut = extra & QUADD_PARAM_EXTRA_BT_UT ? 1 : 0;
719 um->ut_ce = extra & QUADD_PARAM_EXTRA_BT_UT_CE ? 1 : 0;
720 um->dwarf = extra & QUADD_PARAM_EXTRA_BT_DWARF ? 1 : 0;
722 pr_info("unw methods: fp/ut/ut_ce/dwarf: %u/%u/%u/%u\n",
723 um->fp, um->ut, um->ut_ce, um->dwarf);
726 if (hrt.tc && (extra & QUADD_PARAM_EXTRA_USE_ARCH_TIMER))
727 hrt.use_arch_timer = 1;
729 hrt.use_arch_timer = 0;
731 pr_info("timer: %s\n", hrt.use_arch_timer ? "arch" : "monotonic clock");
733 hrt.get_stack_offset =
734 (extra & QUADD_PARAM_EXTRA_STACK_OFFSET) ? 1 : 0;
736 for_each_possible_cpu(cpuid)
739 if (extra & QUADD_PARAM_EXTRA_GET_MMAP) {
740 err = quadd_get_current_mmap(param->pids[0]);
742 pr_err("error: quadd_get_current_mmap\n");
750 quadd_ma_start(&hrt);
752 atomic_set(&hrt.active, 1);
754 pr_info("Start hrt: freq/period: %ld/%llu\n", freq, period);
758 void quadd_hrt_stop(void)
760 struct quadd_ctx *ctx = hrt.quadd_ctx;
762 pr_info("Stop hrt, samples all/skipped: %llu/%llu\n",
763 atomic64_read(&hrt.counter_samples),
764 atomic64_read(&hrt.skipped_samples));
771 atomic_set(&hrt.active, 0);
773 atomic64_set(&hrt.counter_samples, 0);
774 atomic64_set(&hrt.skipped_samples, 0);
776 /* reset_cpu_ctx(); */
779 void quadd_hrt_deinit(void)
781 if (atomic_read(&hrt.active))
784 free_percpu(hrt.cpu_ctx);
787 void quadd_hrt_get_state(struct quadd_module_state *state)
789 state->nr_all_samples = atomic64_read(&hrt.counter_samples);
790 state->nr_skipped_samples = atomic64_read(&hrt.skipped_samples);
793 static void init_arch_timer(void)
795 u32 cntkctl = arch_timer_get_cntkctl();
797 if (cntkctl & ARCH_TIMER_USR_VCT_ACCESS_EN)
798 hrt.tc = arch_timer_get_timecounter();
803 struct quadd_hrt_ctx *quadd_hrt_init(struct quadd_ctx *ctx)
808 struct quadd_cpu_context *cpu_ctx;
811 atomic_set(&hrt.active, 0);
813 freq = ctx->param.freq;
814 freq = max_t(long, QUADD_HRT_MIN_FREQ, freq);
815 period = NSEC_PER_SEC / freq;
816 hrt.sample_period = period;
818 if (ctx->param.ma_freq > 0)
819 hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq;
823 atomic64_set(&hrt.counter_samples, 0);
826 hrt.cpu_ctx = alloc_percpu(struct quadd_cpu_context);
828 return ERR_PTR(-ENOMEM);
830 for_each_possible_cpu(cpu_id) {
831 cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id);
833 atomic_set(&cpu_ctx->nr_active, 0);
835 cpu_ctx->active_thread.pid = -1;
836 cpu_ctx->active_thread.tgid = -1;
838 cpu_ctx->cc.hrt = &hrt;
840 init_hrtimer(cpu_ctx);