2 * drivers/misc/tegra-profiler/hrt.c
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19 #include <linux/sched.h>
20 #include <linux/hrtimer.h>
21 #include <linux/slab.h>
22 #include <linux/cpu.h>
23 #include <linux/ptrace.h>
24 #include <linux/interrupt.h>
25 #include <linux/err.h>
26 #include <linux/nsproxy.h>
27 #include <clocksource/arm_arch_timer.h>
29 #include <asm/cputype.h>
30 #include <asm/irq_regs.h>
31 #include <asm/arch_timer.h>
33 #include <linux/tegra_profiler.h>
40 #include "power_clk.h"
44 static struct quadd_hrt_ctx hrt;
47 read_all_sources(struct pt_regs *regs, struct task_struct *task);
49 struct hrt_event_value {
54 static enum hrtimer_restart hrtimer_handler(struct hrtimer *hrtimer)
58 regs = get_irq_regs();
61 return HRTIMER_NORESTART;
63 qm_debug_handler_sample(regs);
66 read_all_sources(regs, NULL);
68 hrtimer_forward_now(hrtimer, ns_to_ktime(hrt.sample_period));
69 qm_debug_timer_forward(regs, hrt.sample_period);
71 return HRTIMER_RESTART;
74 static void start_hrtimer(struct quadd_cpu_context *cpu_ctx)
76 u64 period = hrt.sample_period;
78 __hrtimer_start_range_ns(&cpu_ctx->hrtimer,
79 ns_to_ktime(period), 0,
80 HRTIMER_MODE_REL_PINNED, 0);
81 qm_debug_timer_start(NULL, period);
84 static void cancel_hrtimer(struct quadd_cpu_context *cpu_ctx)
86 hrtimer_cancel(&cpu_ctx->hrtimer);
87 qm_debug_timer_cancel();
90 static void init_hrtimer(struct quadd_cpu_context *cpu_ctx)
92 hrtimer_init(&cpu_ctx->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
93 cpu_ctx->hrtimer.function = hrtimer_handler;
96 static inline u64 get_posix_clock_monotonic_time(void)
100 do_posix_clock_monotonic_gettime(&ts);
101 return timespec_to_ns(&ts);
104 static inline u64 get_arch_time(struct timecounter *tc)
107 const struct cyclecounter *cc = tc->cc;
109 value = cc->read(cc);
110 return cyclecounter_cyc2ns(cc, value);
113 u64 quadd_get_time(void)
115 struct timecounter *tc = hrt.tc;
117 return (tc && hrt.use_arch_timer) ?
119 get_posix_clock_monotonic_time();
123 put_sample_cpu(struct quadd_record_data *data,
124 struct quadd_iovec *vec,
125 int vec_count, int cpu_id)
128 struct quadd_comm_data_interface *comm = hrt.quadd_ctx->comm;
130 err = comm->put_sample(data, vec, vec_count, cpu_id);
132 atomic64_inc(&hrt.skipped_samples);
134 atomic64_inc(&hrt.counter_samples);
138 quadd_put_sample(struct quadd_record_data *data,
139 struct quadd_iovec *vec, int vec_count)
141 put_sample_cpu(data, vec, vec_count, -1);
144 static void put_header(void)
147 int nr_events = 0, max_events = QUADD_MAX_COUNTERS;
148 int events[QUADD_MAX_COUNTERS];
149 struct quadd_record_data record;
150 struct quadd_header_data *hdr = &record.hdr;
151 struct quadd_parameters *param = &hrt.quadd_ctx->param;
152 unsigned int extra = param->reserved[QUADD_PARAM_IDX_EXTRA];
153 struct quadd_iovec vec;
154 struct quadd_ctx *ctx = hrt.quadd_ctx;
155 struct quadd_event_source_interface *pmu = ctx->pmu;
156 struct quadd_event_source_interface *pl310 = ctx->pl310;
158 record.record_type = QUADD_RECORD_TYPE_HEADER;
160 hdr->magic = QUADD_HEADER_MAGIC;
161 hdr->version = QUADD_SAMPLES_VERSION;
163 hdr->backtrace = param->backtrace;
164 hdr->use_freq = param->use_freq;
165 hdr->system_wide = param->system_wide;
167 /* TODO: dynamically */
168 #ifdef QM_DEBUG_SAMPLES_ENABLE
169 hdr->debug_samples = 1;
171 hdr->debug_samples = 0;
174 hdr->freq = param->freq;
175 hdr->ma_freq = param->ma_freq;
176 hdr->power_rate_freq = param->power_rate_freq;
178 hdr->power_rate = hdr->power_rate_freq > 0 ? 1 : 0;
179 hdr->get_mmap = (extra & QUADD_PARAM_EXTRA_GET_MMAP) ? 1 : 0;
182 hdr->extra_length = 0;
184 hdr->reserved |= hrt.unw_method << QUADD_HDR_UNW_METHOD_SHIFT;
186 if (hrt.use_arch_timer)
187 hdr->reserved |= QUADD_HDR_USE_ARCH_TIMER;
189 if (hrt.get_stack_offset)
190 hdr->reserved |= QUADD_HDR_STACK_OFFSET;
193 nr_events += pmu->get_current_events(events, max_events);
196 nr_events += pl310->get_current_events(events + nr_events,
197 max_events - nr_events);
199 hdr->nr_events = nr_events;
202 vec.len = nr_events * sizeof(events[0]);
204 for_each_possible_cpu(cpu_id)
205 put_sample_cpu(&record, &vec, 1, cpu_id);
209 put_sched_sample(struct task_struct *task, int is_sched_in)
211 unsigned int cpu, flags;
212 struct quadd_record_data record;
213 struct quadd_sched_data *s = &record.sched;
215 record.record_type = QUADD_RECORD_TYPE_SCHED;
217 cpu = quadd_get_processor_id(NULL, &flags);
219 s->lp_mode = (flags & QUADD_CPUMODE_TEGRA_POWER_CLUSTER_LP) ? 1 : 0;
221 s->sched_in = is_sched_in ? 1 : 0;
222 s->time = quadd_get_time();
230 quadd_put_sample(&record, NULL, 0);
233 static int get_sample_data(struct quadd_sample_data *sample,
234 struct pt_regs *regs,
235 struct task_struct *task)
237 unsigned int cpu, flags;
238 struct quadd_ctx *quadd_ctx = hrt.quadd_ctx;
240 cpu = quadd_get_processor_id(regs, &flags);
244 (flags & QUADD_CPUMODE_TEGRA_POWER_CLUSTER_LP) ? 1 : 0;
245 sample->thumb_mode = (flags & QUADD_CPUMODE_THUMB) ? 1 : 0;
246 sample->user_mode = user_mode(regs) ? 1 : 0;
248 /* For security reasons, hide IPs from the kernel space. */
249 if (!sample->user_mode && !quadd_ctx->collect_kernel_ips)
252 sample->ip = instruction_pointer(regs);
254 sample->time = quadd_get_time();
255 sample->reserved = 0;
256 sample->pid = task->pid;
257 sample->in_interrupt = in_interrupt() ? 1 : 0;
262 static int read_source(struct quadd_event_source_interface *source,
263 struct pt_regs *regs,
264 struct hrt_event_value *events_vals,
268 u32 prev_val, val, res_val;
269 struct event_data events[QUADD_MAX_COUNTERS];
274 max_events = min_t(int, max_events, QUADD_MAX_COUNTERS);
275 nr_events = source->read(events, max_events);
277 for (i = 0; i < nr_events; i++) {
278 struct event_data *s = &events[i];
280 prev_val = s->prev_val;
284 res_val = val - prev_val;
286 res_val = QUADD_U32_MAX - prev_val + val;
288 if (s->event_source == QUADD_EVENT_SOURCE_PL310) {
289 int nr_active = atomic_read(&hrt.nr_active_all_core);
291 res_val /= nr_active;
294 events_vals[i].event_id = s->event_id;
295 events_vals[i].value = res_val;
302 get_stack_offset(struct task_struct *task,
303 struct pt_regs *regs,
304 struct quadd_callchain *cc)
307 struct vm_area_struct *vma;
308 struct mm_struct *mm = task->mm;
313 sp = cc->nr > 0 ? cc->curr_sp :
314 quadd_user_stack_pointer(regs);
316 vma = find_vma(mm, sp);
320 return vma->vm_end - sp;
324 read_all_sources(struct pt_regs *regs, struct task_struct *task)
326 u32 state, extra_data = 0;
327 int i, vec_idx = 0, bt_size = 0;
328 int nr_events = 0, nr_positive_events = 0;
329 struct pt_regs *user_regs;
330 struct quadd_iovec vec[5];
331 struct hrt_event_value events[QUADD_MAX_COUNTERS];
332 u32 events_extra[QUADD_MAX_COUNTERS];
334 struct quadd_record_data record_data;
335 struct quadd_sample_data *s = &record_data.sample;
337 struct quadd_ctx *ctx = hrt.quadd_ctx;
338 struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
339 struct quadd_callchain *cc = &cpu_ctx->cc;
344 if (atomic_read(&cpu_ctx->nr_active) == 0)
351 if (!task_nsproxy(task)) {
357 if (ctx->pmu && ctx->pmu_info.active)
358 nr_events += read_source(ctx->pmu, regs,
359 events, QUADD_MAX_COUNTERS);
361 if (ctx->pl310 && ctx->pl310_info.active)
362 nr_events += read_source(ctx->pl310, regs,
364 QUADD_MAX_COUNTERS - nr_events);
372 user_regs = current_pt_regs();
374 if (get_sample_data(s, regs, task))
377 vec[vec_idx].base = &extra_data;
378 vec[vec_idx].len = sizeof(extra_data);
388 if (ctx->param.backtrace) {
389 cc->unw_method = hrt.unw_method;
390 bt_size = quadd_get_user_callchain(user_regs, cc, ctx, task);
392 if (!bt_size && !user_mode(regs)) {
393 unsigned long pc = instruction_pointer(user_regs);
397 cc->cs_64 = compat_user_mode(user_regs) ? 0 : 1;
401 bt_size += quadd_callchain_store(cc, pc,
402 QUADD_UNW_TYPE_KCTX);
406 int ip_size = cc->cs_64 ? sizeof(u64) : sizeof(u32);
407 int nr_types = DIV_ROUND_UP(bt_size, 8);
409 vec[vec_idx].base = cc->cs_64 ?
410 (void *)cc->ip_64 : (void *)cc->ip_32;
411 vec[vec_idx].len = bt_size * ip_size;
414 vec[vec_idx].base = cc->types;
415 vec[vec_idx].len = nr_types * sizeof(cc->types[0]);
419 extra_data |= QUADD_SED_IP64;
422 extra_data |= cc->unw_method << QUADD_SED_UNW_METHOD_SHIFT;
423 s->reserved |= cc->unw_rc << QUADD_SAMPLE_URC_SHIFT;
425 s->callchain_nr = bt_size;
427 if (hrt.get_stack_offset) {
428 long offset = get_stack_offset(task, user_regs, cc);
430 u32 off = offset >> 2;
431 off = min_t(u32, off, 0xffff);
432 extra_data |= off << QUADD_SED_STACK_OFFSET_SHIFT;
436 record_data.record_type = QUADD_RECORD_TYPE_SAMPLE;
439 for (i = 0; i < nr_events; i++) {
440 u32 value = events[i].value;
442 s->events_flags |= 1 << i;
443 events_extra[nr_positive_events++] = value;
447 if (nr_positive_events == 0)
450 vec[vec_idx].base = events_extra;
451 vec[vec_idx].len = nr_positive_events * sizeof(events_extra[0]);
457 vec[vec_idx].base = &state;
458 vec[vec_idx].len = sizeof(state);
464 quadd_put_sample(&record_data, vec, vec_idx);
468 is_profile_process(struct task_struct *task)
471 pid_t pid, profile_pid;
472 struct quadd_ctx *ctx = hrt.quadd_ctx;
479 for (i = 0; i < ctx->param.nr_pids; i++) {
480 profile_pid = ctx->param.pids[i];
481 if (profile_pid == pid)
488 add_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid, pid_t tgid)
490 struct quadd_thread_data *t_data = &cpu_ctx->active_thread;
492 if (t_data->pid > 0 ||
493 atomic_read(&cpu_ctx->nr_active) > 0) {
494 pr_warn_once("Warning for thread: %d\n", (int)pid);
503 static int remove_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid)
505 struct quadd_thread_data *t_data = &cpu_ctx->active_thread;
510 if (t_data->pid == pid) {
516 pr_warn_once("Warning for thread: %d\n", (int)pid);
520 void __quadd_task_sched_in(struct task_struct *prev,
521 struct task_struct *task)
523 struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
524 struct quadd_ctx *ctx = hrt.quadd_ctx;
525 struct event_data events[QUADD_MAX_COUNTERS];
526 /* static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 2); */
528 if (likely(!hrt.active))
531 if (__ratelimit(&ratelimit_state))
532 pr_info("sch_in, cpu: %d, prev: %u (%u) \t--> curr: %u (%u)\n",
533 smp_processor_id(), (unsigned int)prev->pid,
534 (unsigned int)prev->tgid, (unsigned int)task->pid,
535 (unsigned int)task->tgid);
538 if (is_profile_process(task)) {
539 put_sched_sample(task, 1);
541 add_active_thread(cpu_ctx, task->pid, task->tgid);
542 atomic_inc(&cpu_ctx->nr_active);
544 if (atomic_read(&cpu_ctx->nr_active) == 1) {
549 ctx->pl310->read(events, 1);
551 start_hrtimer(cpu_ctx);
552 atomic_inc(&hrt.nr_active_all_core);
557 void __quadd_task_sched_out(struct task_struct *prev,
558 struct task_struct *next)
561 struct pt_regs *user_regs;
562 struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
563 struct quadd_ctx *ctx = hrt.quadd_ctx;
564 /* static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 2); */
566 if (likely(!hrt.active))
569 if (__ratelimit(&ratelimit_state))
570 pr_info("sch_out: cpu: %d, prev: %u (%u) \t--> next: %u (%u)\n",
571 smp_processor_id(), (unsigned int)prev->pid,
572 (unsigned int)prev->tgid, (unsigned int)next->pid,
573 (unsigned int)next->tgid);
576 if (is_profile_process(prev)) {
577 user_regs = task_pt_regs(prev);
579 read_all_sources(user_regs, prev);
581 n = remove_active_thread(cpu_ctx, prev->pid);
582 atomic_sub(n, &cpu_ctx->nr_active);
584 if (n && atomic_read(&cpu_ctx->nr_active) == 0) {
585 cancel_hrtimer(cpu_ctx);
586 atomic_dec(&hrt.nr_active_all_core);
592 put_sched_sample(prev, 0);
596 void __quadd_event_mmap(struct vm_area_struct *vma)
598 struct quadd_parameters *param;
600 if (likely(!hrt.active))
603 if (!is_profile_process(current))
606 param = &hrt.quadd_ctx->param;
607 quadd_process_mmap(vma, param->pids[0]);
610 static void reset_cpu_ctx(void)
613 struct quadd_cpu_context *cpu_ctx;
614 struct quadd_thread_data *t_data;
616 for (cpu_id = 0; cpu_id < nr_cpu_ids; cpu_id++) {
617 cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id);
618 t_data = &cpu_ctx->active_thread;
620 atomic_set(&cpu_ctx->nr_active, 0);
627 int quadd_hrt_start(void)
633 struct quadd_ctx *ctx = hrt.quadd_ctx;
634 struct quadd_parameters *param = &ctx->param;
636 freq = ctx->param.freq;
637 freq = max_t(long, QUADD_HRT_MIN_FREQ, freq);
638 period = NSEC_PER_SEC / freq;
639 hrt.sample_period = period;
641 if (ctx->param.ma_freq > 0)
642 hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq;
646 atomic64_set(&hrt.counter_samples, 0);
647 atomic64_set(&hrt.skipped_samples, 0);
651 extra = param->reserved[QUADD_PARAM_IDX_EXTRA];
653 if (extra & QUADD_PARAM_EXTRA_BT_MIXED)
654 hrt.unw_method = QUADD_UNW_METHOD_MIXED;
655 else if (extra & QUADD_PARAM_EXTRA_BT_UNWIND_TABLES)
656 hrt.unw_method = QUADD_UNW_METHOD_EHT;
657 else if (extra & QUADD_PARAM_EXTRA_BT_FP)
658 hrt.unw_method = QUADD_UNW_METHOD_FP;
660 hrt.unw_method = QUADD_UNW_METHOD_NONE;
662 if (hrt.tc && (extra & QUADD_PARAM_EXTRA_USE_ARCH_TIMER))
663 hrt.use_arch_timer = 1;
665 hrt.use_arch_timer = 0;
667 pr_info("timer: %s\n", hrt.use_arch_timer ? "arch" : "monotonic clock");
669 hrt.get_stack_offset =
670 (extra & QUADD_PARAM_EXTRA_STACK_OFFSET) ? 1 : 0;
674 if (extra & QUADD_PARAM_EXTRA_GET_MMAP) {
675 err = quadd_get_current_mmap(param->pids[0]);
677 pr_err("error: quadd_get_current_mmap\n");
685 quadd_ma_start(&hrt);
689 pr_info("Start hrt: freq/period: %ld/%llu\n", freq, period);
693 void quadd_hrt_stop(void)
695 struct quadd_ctx *ctx = hrt.quadd_ctx;
697 pr_info("Stop hrt, samples all/skipped: %llu/%llu\n",
698 atomic64_read(&hrt.counter_samples),
699 atomic64_read(&hrt.skipped_samples));
708 atomic64_set(&hrt.counter_samples, 0);
709 atomic64_set(&hrt.skipped_samples, 0);
711 /* reset_cpu_ctx(); */
714 void quadd_hrt_deinit(void)
719 free_percpu(hrt.cpu_ctx);
722 void quadd_hrt_get_state(struct quadd_module_state *state)
724 state->nr_all_samples = atomic64_read(&hrt.counter_samples);
725 state->nr_skipped_samples = atomic64_read(&hrt.skipped_samples);
728 static void init_arch_timer(void)
730 u32 cntkctl = arch_timer_get_cntkctl();
732 if (cntkctl & ARCH_TIMER_USR_VCT_ACCESS_EN)
733 hrt.tc = arch_timer_get_timecounter();
738 struct quadd_hrt_ctx *quadd_hrt_init(struct quadd_ctx *ctx)
743 struct quadd_cpu_context *cpu_ctx;
748 freq = ctx->param.freq;
749 freq = max_t(long, QUADD_HRT_MIN_FREQ, freq);
750 period = NSEC_PER_SEC / freq;
751 hrt.sample_period = period;
753 if (ctx->param.ma_freq > 0)
754 hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq;
758 atomic64_set(&hrt.counter_samples, 0);
761 hrt.cpu_ctx = alloc_percpu(struct quadd_cpu_context);
763 return ERR_PTR(-ENOMEM);
765 for_each_possible_cpu(cpu_id) {
766 cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id);
768 atomic_set(&cpu_ctx->nr_active, 0);
770 cpu_ctx->active_thread.pid = -1;
771 cpu_ctx->active_thread.tgid = -1;
773 cpu_ctx->cc.hrt = &hrt;
775 init_hrtimer(cpu_ctx);