]> rtime.felk.cvut.cz Git - linux-imx.git/blobdiff - kernel/trace/trace_uprobe.c
uprobes/tracing: Make uprobe_{trace,perf}_print() uretprobe-friendly
[linux-imx.git] / kernel / trace / trace_uprobe.c
index c86e6d4f67fbfac4a81c6c58d7194afa7603f7e5..0ed99a27d122ad5f0995cb2b8bc9e4b780c290c7 100644 (file)
 
 #define UPROBE_EVENT_SYSTEM    "uprobes"
 
+struct uprobe_trace_entry_head {
+       struct trace_entry      ent;
+       unsigned long           vaddr[];
+};
+
+#define SIZEOF_TRACE_ENTRY(is_return)                  \
+       (sizeof(struct uprobe_trace_entry_head) +       \
+        sizeof(unsigned long) * (is_return ? 2 : 1))
+
+#define DATAOF_TRACE_ENTRY(entry, is_return)           \
+       ((void*)(entry) + SIZEOF_TRACE_ENTRY(is_return))
+
+struct trace_uprobe_filter {
+       rwlock_t                rwlock;
+       int                     nr_systemwide;
+       struct list_head        perf_events;
+};
+
 /*
  * uprobe event core functions
  */
-struct trace_uprobe;
-struct uprobe_trace_consumer {
-       struct uprobe_consumer          cons;
-       struct trace_uprobe             *tu;
-};
-
 struct trace_uprobe {
        struct list_head                list;
        struct ftrace_event_class       class;
        struct ftrace_event_call        call;
-       struct uprobe_trace_consumer    *consumer;
+       struct trace_uprobe_filter      filter;
+       struct uprobe_consumer          consumer;
        struct inode                    *inode;
        char                            *filename;
        unsigned long                   offset;
@@ -63,12 +76,31 @@ static DEFINE_MUTEX(uprobe_lock);
 static LIST_HEAD(uprobe_list);
 
 static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
+static int uretprobe_dispatcher(struct uprobe_consumer *con,
+                               unsigned long func, struct pt_regs *regs);
+
+static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter)
+{
+       rwlock_init(&filter->rwlock);
+       filter->nr_systemwide = 0;
+       INIT_LIST_HEAD(&filter->perf_events);
+}
+
+static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter)
+{
+       return !filter->nr_systemwide && list_empty(&filter->perf_events);
+}
+
+static inline bool is_ret_probe(struct trace_uprobe *tu)
+{
+       return tu->consumer.ret_handler != NULL;
+}
 
 /*
  * Allocate new trace_uprobe and initialize it (including uprobes).
  */
 static struct trace_uprobe *
-alloc_trace_uprobe(const char *group, const char *event, int nargs)
+alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
 {
        struct trace_uprobe *tu;
 
@@ -92,6 +124,10 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs)
                goto error;
 
        INIT_LIST_HEAD(&tu->list);
+       tu->consumer.handler = uprobe_dispatcher;
+       if (is_ret)
+               tu->consumer.ret_handler = uretprobe_dispatcher;
+       init_trace_uprobe_filter(&tu->filter);
        return tu;
 
 error:
@@ -253,12 +289,18 @@ static int create_trace_uprobe(int argc, char **argv)
        if (ret)
                goto fail_address_parse;
 
+       inode = igrab(path.dentry->d_inode);
+       path_put(&path);
+
+       if (!inode || !S_ISREG(inode->i_mode)) {
+               ret = -EINVAL;
+               goto fail_address_parse;
+       }
+
        ret = kstrtoul(arg, 0, &offset);
        if (ret)
                goto fail_address_parse;
 
-       inode = igrab(path.dentry->d_inode);
-
        argc -= 2;
        argv += 2;
 
@@ -282,7 +324,7 @@ static int create_trace_uprobe(int argc, char **argv)
                kfree(tail);
        }
 
-       tu = alloc_trace_uprobe(group, event, argc);
+       tu = alloc_trace_uprobe(group, event, argc, false);
        if (IS_ERR(tu)) {
                pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu));
                ret = PTR_ERR(tu);
@@ -356,7 +398,7 @@ fail_address_parse:
        if (inode)
                iput(inode);
 
-       pr_info("Failed to parse address.\n");
+       pr_info("Failed to parse address or file.\n");
 
        return ret;
 }
@@ -464,65 +506,73 @@ static const struct file_operations uprobe_profile_ops = {
        .release        = seq_release,
 };
 
-/* uprobe handler */
-static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
+static void uprobe_trace_print(struct trace_uprobe *tu,
+                               unsigned long func, struct pt_regs *regs)
 {
        struct uprobe_trace_entry_head *entry;
        struct ring_buffer_event *event;
        struct ring_buffer *buffer;
-       u8 *data;
-       int size, i, pc;
-       unsigned long irq_flags;
+       void *data;
+       int size, i;
        struct ftrace_event_call *call = &tu->call;
 
-       tu->nhit++;
-
-       local_save_flags(irq_flags);
-       pc = preempt_count();
-
-       size = sizeof(*entry) + tu->size;
-
+       size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
        event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
-                                                 size, irq_flags, pc);
+                                                 size + tu->size, 0, 0);
        if (!event)
                return;
 
        entry = ring_buffer_event_data(event);
-       entry->ip = uprobe_get_swbp_addr(task_pt_regs(current));
-       data = (u8 *)&entry[1];
+       if (is_ret_probe(tu)) {
+               entry->vaddr[0] = func;
+               entry->vaddr[1] = instruction_pointer(regs);
+               data = DATAOF_TRACE_ENTRY(entry, true);
+       } else {
+               entry->vaddr[0] = instruction_pointer(regs);
+               data = DATAOF_TRACE_ENTRY(entry, false);
+       }
+
        for (i = 0; i < tu->nr_args; i++)
                call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
 
        if (!filter_current_check_discard(buffer, call, entry, event))
-               trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
+               trace_buffer_unlock_commit(buffer, event, 0, 0);
+}
+
+/* uprobe handler */
+static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
+{
+       if (!is_ret_probe(tu))
+               uprobe_trace_print(tu, 0, regs);
+       return 0;
+}
+
+static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
+                               struct pt_regs *regs)
+{
+       uprobe_trace_print(tu, func, regs);
 }
 
 /* Event entry printers */
 static enum print_line_t
 print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event)
 {
-       struct uprobe_trace_entry_head *field;
+       struct uprobe_trace_entry_head *entry;
        struct trace_seq *s = &iter->seq;
        struct trace_uprobe *tu;
        u8 *data;
        int i;
 
-       field = (struct uprobe_trace_entry_head *)iter->ent;
+       entry = (struct uprobe_trace_entry_head *)iter->ent;
        tu = container_of(event, struct trace_uprobe, call.event);
 
-       if (!trace_seq_printf(s, "%s: (", tu->call.name))
-               goto partial;
-
-       if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
+       if (!trace_seq_printf(s, "%s: (0x%lx)", tu->call.name, entry->vaddr[0]))
                goto partial;
 
-       if (!trace_seq_puts(s, ")"))
-               goto partial;
-
-       data = (u8 *)&field[1];
+       data = DATAOF_TRACE_ENTRY(entry, false);
        for (i = 0; i < tu->nr_args; i++) {
                if (!tu->args[i].type->print(s, tu->args[i].name,
-                                            data + tu->args[i].offset, field))
+                                            data + tu->args[i].offset, entry))
                        goto partial;
        }
 
@@ -533,56 +583,58 @@ partial:
        return TRACE_TYPE_PARTIAL_LINE;
 }
 
-static int probe_event_enable(struct trace_uprobe *tu, int flag)
+static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu)
 {
-       struct uprobe_trace_consumer *utc;
-       int ret = 0;
+       return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE);
+}
 
-       if (!tu->inode || tu->consumer)
-               return -EINTR;
+typedef bool (*filter_func_t)(struct uprobe_consumer *self,
+                               enum uprobe_filter_ctx ctx,
+                               struct mm_struct *mm);
+
+static int
+probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
+{
+       int ret = 0;
 
-       utc = kzalloc(sizeof(struct uprobe_trace_consumer), GFP_KERNEL);
-       if (!utc)
+       if (is_trace_uprobe_enabled(tu))
                return -EINTR;
 
-       utc->cons.handler = uprobe_dispatcher;
-       utc->cons.filter = NULL;
-       ret = uprobe_register(tu->inode, tu->offset, &utc->cons);
-       if (ret) {
-               kfree(utc);
-               return ret;
-       }
+       WARN_ON(!uprobe_filter_is_empty(&tu->filter));
 
        tu->flags |= flag;
-       utc->tu = tu;
-       tu->consumer = utc;
+       tu->consumer.filter = filter;
+       ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
+       if (ret)
+               tu->flags &= ~flag;
 
-       return 0;
+       return ret;
 }
 
 static void probe_event_disable(struct trace_uprobe *tu, int flag)
 {
-       if (!tu->inode || !tu->consumer)
+       if (!is_trace_uprobe_enabled(tu))
                return;
 
-       uprobe_unregister(tu->inode, tu->offset, &tu->consumer->cons);
+       WARN_ON(!uprobe_filter_is_empty(&tu->filter));
+
+       uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
        tu->flags &= ~flag;
-       kfree(tu->consumer);
-       tu->consumer = NULL;
 }
 
 static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
 {
-       int ret, i;
+       int ret, i, size;
        struct uprobe_trace_entry_head field;
-       struct trace_uprobe *tu = (struct trace_uprobe *)event_call->data;
+       struct trace_uprobe *tu = event_call->data;
 
-       DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
+       DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_IP, 0);
+       size = SIZEOF_TRACE_ENTRY(false);
        /* Set argument names as fields */
        for (i = 0; i < tu->nr_args; i++) {
                ret = trace_define_field(event_call, tu->args[i].type->fmttype,
                                         tu->args[i].name,
-                                        sizeof(field) + tu->args[i].offset,
+                                        size + tu->args[i].offset,
                                         tu->args[i].type->size,
                                         tu->args[i].type->is_signed,
                                         FILTER_OTHER);
@@ -642,49 +694,159 @@ static int set_print_fmt(struct trace_uprobe *tu)
 }
 
 #ifdef CONFIG_PERF_EVENTS
-/* uprobe profile handler */
-static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
+static bool
+__uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
+{
+       struct perf_event *event;
+
+       if (filter->nr_systemwide)
+               return true;
+
+       list_for_each_entry(event, &filter->perf_events, hw.tp_list) {
+               if (event->hw.tp_target->mm == mm)
+                       return true;
+       }
+
+       return false;
+}
+
+static inline bool
+uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
+{
+       return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
+}
+
+static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
+{
+       bool done;
+
+       write_lock(&tu->filter.rwlock);
+       if (event->hw.tp_target) {
+               /*
+                * event->parent != NULL means copy_process(), we can avoid
+                * uprobe_apply(). current->mm must be probed and we can rely
+                * on dup_mmap() which preserves the already installed bp's.
+                *
+                * attr.enable_on_exec means that exec/mmap will install the
+                * breakpoints we need.
+                */
+               done = tu->filter.nr_systemwide ||
+                       event->parent || event->attr.enable_on_exec ||
+                       uprobe_filter_event(tu, event);
+               list_add(&event->hw.tp_list, &tu->filter.perf_events);
+       } else {
+               done = tu->filter.nr_systemwide;
+               tu->filter.nr_systemwide++;
+       }
+       write_unlock(&tu->filter.rwlock);
+
+       if (!done)
+               uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
+
+       return 0;
+}
+
+static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
+{
+       bool done;
+
+       write_lock(&tu->filter.rwlock);
+       if (event->hw.tp_target) {
+               list_del(&event->hw.tp_list);
+               done = tu->filter.nr_systemwide ||
+                       (event->hw.tp_target->flags & PF_EXITING) ||
+                       uprobe_filter_event(tu, event);
+       } else {
+               tu->filter.nr_systemwide--;
+               done = tu->filter.nr_systemwide;
+       }
+       write_unlock(&tu->filter.rwlock);
+
+       if (!done)
+               uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
+
+       return 0;
+}
+
+static bool uprobe_perf_filter(struct uprobe_consumer *uc,
+                               enum uprobe_filter_ctx ctx, struct mm_struct *mm)
+{
+       struct trace_uprobe *tu;
+       int ret;
+
+       tu = container_of(uc, struct trace_uprobe, consumer);
+       read_lock(&tu->filter.rwlock);
+       ret = __uprobe_perf_filter(&tu->filter, mm);
+       read_unlock(&tu->filter.rwlock);
+
+       return ret;
+}
+
+static void uprobe_perf_print(struct trace_uprobe *tu,
+                               unsigned long func, struct pt_regs *regs)
 {
        struct ftrace_event_call *call = &tu->call;
        struct uprobe_trace_entry_head *entry;
        struct hlist_head *head;
-       u8 *data;
-       int size, __size, i;
-       int rctx;
+       unsigned long ip;
+       void *data;
+       int size, rctx, i;
 
-       __size = sizeof(*entry) + tu->size;
-       size = ALIGN(__size + sizeof(u32), sizeof(u64));
-       size -= sizeof(u32);
+       size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
+       size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32);
        if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
                return;
 
        preempt_disable();
-
        entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
        if (!entry)
                goto out;
 
-       entry->ip = uprobe_get_swbp_addr(task_pt_regs(current));
-       data = (u8 *)&entry[1];
+       ip = instruction_pointer(regs);
+       if (is_ret_probe(tu)) {
+               entry->vaddr[0] = func;
+               entry->vaddr[1] = ip;
+               data = DATAOF_TRACE_ENTRY(entry, true);
+       } else {
+               entry->vaddr[0] = ip;
+               data = DATAOF_TRACE_ENTRY(entry, false);
+       }
+
        for (i = 0; i < tu->nr_args; i++)
                call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
 
        head = this_cpu_ptr(call->perf_events);
-       perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head, NULL);
-
+       perf_trace_buf_submit(entry, size, rctx, ip, 1, regs, head, NULL);
  out:
        preempt_enable();
 }
+
+/* uprobe profile handler */
+static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
+{
+       if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
+               return UPROBE_HANDLER_REMOVE;
+
+       if (!is_ret_probe(tu))
+               uprobe_perf_print(tu, 0, regs);
+       return 0;
+}
+
+static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
+                               struct pt_regs *regs)
+{
+       uprobe_perf_print(tu, func, regs);
+}
 #endif /* CONFIG_PERF_EVENTS */
 
 static
 int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data)
 {
-       struct trace_uprobe *tu = (struct trace_uprobe *)event->data;
+       struct trace_uprobe *tu = event->data;
 
        switch (type) {
        case TRACE_REG_REGISTER:
-               return probe_event_enable(tu, TP_FLAG_TRACE);
+               return probe_event_enable(tu, TP_FLAG_TRACE, NULL);
 
        case TRACE_REG_UNREGISTER:
                probe_event_disable(tu, TP_FLAG_TRACE);
@@ -692,11 +854,18 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
 
 #ifdef CONFIG_PERF_EVENTS
        case TRACE_REG_PERF_REGISTER:
-               return probe_event_enable(tu, TP_FLAG_PROFILE);
+               return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter);
 
        case TRACE_REG_PERF_UNREGISTER:
                probe_event_disable(tu, TP_FLAG_PROFILE);
                return 0;
+
+       case TRACE_REG_PERF_OPEN:
+               return uprobe_perf_open(tu, data);
+
+       case TRACE_REG_PERF_CLOSE:
+               return uprobe_perf_close(tu, data);
+
 #endif
        default:
                return 0;
@@ -706,20 +875,35 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
 
 static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
 {
-       struct uprobe_trace_consumer *utc;
        struct trace_uprobe *tu;
+       int ret = 0;
 
-       utc = container_of(con, struct uprobe_trace_consumer, cons);
-       tu = utc->tu;
-       if (!tu || tu->consumer != utc)
-               return 0;
+       tu = container_of(con, struct trace_uprobe, consumer);
+       tu->nhit++;
+
+       if (tu->flags & TP_FLAG_TRACE)
+               ret |= uprobe_trace_func(tu, regs);
+
+#ifdef CONFIG_PERF_EVENTS
+       if (tu->flags & TP_FLAG_PROFILE)
+               ret |= uprobe_perf_func(tu, regs);
+#endif
+       return ret;
+}
+
+static int uretprobe_dispatcher(struct uprobe_consumer *con,
+                               unsigned long func, struct pt_regs *regs)
+{
+       struct trace_uprobe *tu;
+
+       tu = container_of(con, struct trace_uprobe, consumer);
 
        if (tu->flags & TP_FLAG_TRACE)
-               uprobe_trace_func(tu, regs);
+               uretprobe_trace_func(tu, func, regs);
 
 #ifdef CONFIG_PERF_EVENTS
        if (tu->flags & TP_FLAG_PROFILE)
-               uprobe_perf_func(tu, regs);
+               uretprobe_perf_func(tu, func, regs);
 #endif
        return 0;
 }