#define UPROBE_EVENT_SYSTEM "uprobes"
+struct uprobe_trace_entry_head {
+ struct trace_entry ent;
+ unsigned long vaddr[];
+};
+
+#define SIZEOF_TRACE_ENTRY(is_return) \
+ (sizeof(struct uprobe_trace_entry_head) + \
+ sizeof(unsigned long) * (is_return ? 2 : 1))
+
+#define DATAOF_TRACE_ENTRY(entry, is_return) \
+ ((void*)(entry) + SIZEOF_TRACE_ENTRY(is_return))
+
+struct trace_uprobe_filter {
+ rwlock_t rwlock;
+ int nr_systemwide;
+ struct list_head perf_events;
+};
+
/*
* uprobe event core functions
*/
-struct trace_uprobe;
-struct uprobe_trace_consumer {
- struct uprobe_consumer cons;
- struct trace_uprobe *tu;
-};
-
struct trace_uprobe {
struct list_head list;
struct ftrace_event_class class;
struct ftrace_event_call call;
- struct uprobe_trace_consumer *consumer;
+ struct trace_uprobe_filter filter;
+ struct uprobe_consumer consumer;
struct inode *inode;
char *filename;
unsigned long offset;
static LIST_HEAD(uprobe_list);
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
+static int uretprobe_dispatcher(struct uprobe_consumer *con,
+ unsigned long func, struct pt_regs *regs);
+
+static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter)
+{
+ rwlock_init(&filter->rwlock);
+ filter->nr_systemwide = 0;
+ INIT_LIST_HEAD(&filter->perf_events);
+}
+
+static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter)
+{
+ return !filter->nr_systemwide && list_empty(&filter->perf_events);
+}
+
+static inline bool is_ret_probe(struct trace_uprobe *tu)
+{
+ return tu->consumer.ret_handler != NULL;
+}
/*
* Allocate new trace_uprobe and initialize it (including uprobes).
*/
static struct trace_uprobe *
-alloc_trace_uprobe(const char *group, const char *event, int nargs)
+alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
{
struct trace_uprobe *tu;
goto error;
INIT_LIST_HEAD(&tu->list);
+ tu->consumer.handler = uprobe_dispatcher;
+ if (is_ret)
+ tu->consumer.ret_handler = uretprobe_dispatcher;
+ init_trace_uprobe_filter(&tu->filter);
return tu;
error:
if (ret)
goto fail_address_parse;
+ inode = igrab(path.dentry->d_inode);
+ path_put(&path);
+
+ if (!inode || !S_ISREG(inode->i_mode)) {
+ ret = -EINVAL;
+ goto fail_address_parse;
+ }
+
ret = kstrtoul(arg, 0, &offset);
if (ret)
goto fail_address_parse;
- inode = igrab(path.dentry->d_inode);
-
argc -= 2;
argv += 2;
kfree(tail);
}
- tu = alloc_trace_uprobe(group, event, argc);
+ tu = alloc_trace_uprobe(group, event, argc, false);
if (IS_ERR(tu)) {
pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu));
ret = PTR_ERR(tu);
if (inode)
iput(inode);
- pr_info("Failed to parse address.\n");
+ pr_info("Failed to parse address or file.\n");
return ret;
}
.release = seq_release,
};
-/* uprobe handler */
-static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
+static void uprobe_trace_print(struct trace_uprobe *tu,
+ unsigned long func, struct pt_regs *regs)
{
struct uprobe_trace_entry_head *entry;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
- u8 *data;
- int size, i, pc;
- unsigned long irq_flags;
+ void *data;
+ int size, i;
struct ftrace_event_call *call = &tu->call;
- tu->nhit++;
-
- local_save_flags(irq_flags);
- pc = preempt_count();
-
- size = sizeof(*entry) + tu->size;
-
+ size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
- size, irq_flags, pc);
+ size + tu->size, 0, 0);
if (!event)
return;
entry = ring_buffer_event_data(event);
- entry->ip = uprobe_get_swbp_addr(task_pt_regs(current));
- data = (u8 *)&entry[1];
+ if (is_ret_probe(tu)) {
+ entry->vaddr[0] = func;
+ entry->vaddr[1] = instruction_pointer(regs);
+ data = DATAOF_TRACE_ENTRY(entry, true);
+ } else {
+ entry->vaddr[0] = instruction_pointer(regs);
+ data = DATAOF_TRACE_ENTRY(entry, false);
+ }
+
for (i = 0; i < tu->nr_args; i++)
call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
if (!filter_current_check_discard(buffer, call, entry, event))
- trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
+ trace_buffer_unlock_commit(buffer, event, 0, 0);
+}
+
+/* uprobe handler */
+static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
+{
+ if (!is_ret_probe(tu))
+ uprobe_trace_print(tu, 0, regs);
+ return 0;
+}
+
+static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
+ struct pt_regs *regs)
+{
+ uprobe_trace_print(tu, func, regs);
}
/* Event entry printers */
static enum print_line_t
print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event)
{
- struct uprobe_trace_entry_head *field;
+ struct uprobe_trace_entry_head *entry;
struct trace_seq *s = &iter->seq;
struct trace_uprobe *tu;
u8 *data;
int i;
- field = (struct uprobe_trace_entry_head *)iter->ent;
+ entry = (struct uprobe_trace_entry_head *)iter->ent;
tu = container_of(event, struct trace_uprobe, call.event);
- if (!trace_seq_printf(s, "%s: (", tu->call.name))
- goto partial;
-
- if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
+ if (!trace_seq_printf(s, "%s: (0x%lx)", tu->call.name, entry->vaddr[0]))
goto partial;
- if (!trace_seq_puts(s, ")"))
- goto partial;
-
- data = (u8 *)&field[1];
+ data = DATAOF_TRACE_ENTRY(entry, false);
for (i = 0; i < tu->nr_args; i++) {
if (!tu->args[i].type->print(s, tu->args[i].name,
- data + tu->args[i].offset, field))
+ data + tu->args[i].offset, entry))
goto partial;
}
return TRACE_TYPE_PARTIAL_LINE;
}
-static int probe_event_enable(struct trace_uprobe *tu, int flag)
+static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu)
{
- struct uprobe_trace_consumer *utc;
- int ret = 0;
+ return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE);
+}
- if (!tu->inode || tu->consumer)
- return -EINTR;
+typedef bool (*filter_func_t)(struct uprobe_consumer *self,
+ enum uprobe_filter_ctx ctx,
+ struct mm_struct *mm);
+
+static int
+probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
+{
+ int ret = 0;
- utc = kzalloc(sizeof(struct uprobe_trace_consumer), GFP_KERNEL);
- if (!utc)
+ if (is_trace_uprobe_enabled(tu))
return -EINTR;
- utc->cons.handler = uprobe_dispatcher;
- utc->cons.filter = NULL;
- ret = uprobe_register(tu->inode, tu->offset, &utc->cons);
- if (ret) {
- kfree(utc);
- return ret;
- }
+ WARN_ON(!uprobe_filter_is_empty(&tu->filter));
tu->flags |= flag;
- utc->tu = tu;
- tu->consumer = utc;
+ tu->consumer.filter = filter;
+ ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
+ if (ret)
+ tu->flags &= ~flag;
- return 0;
+ return ret;
}
static void probe_event_disable(struct trace_uprobe *tu, int flag)
{
- if (!tu->inode || !tu->consumer)
+ if (!is_trace_uprobe_enabled(tu))
return;
- uprobe_unregister(tu->inode, tu->offset, &tu->consumer->cons);
+ WARN_ON(!uprobe_filter_is_empty(&tu->filter));
+
+ uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
tu->flags &= ~flag;
- kfree(tu->consumer);
- tu->consumer = NULL;
}
static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
{
- int ret, i;
+ int ret, i, size;
struct uprobe_trace_entry_head field;
- struct trace_uprobe *tu = (struct trace_uprobe *)event_call->data;
+ struct trace_uprobe *tu = event_call->data;
- DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
+ DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_IP, 0);
+ size = SIZEOF_TRACE_ENTRY(false);
/* Set argument names as fields */
for (i = 0; i < tu->nr_args; i++) {
ret = trace_define_field(event_call, tu->args[i].type->fmttype,
tu->args[i].name,
- sizeof(field) + tu->args[i].offset,
+ size + tu->args[i].offset,
tu->args[i].type->size,
tu->args[i].type->is_signed,
FILTER_OTHER);
}
#ifdef CONFIG_PERF_EVENTS
-/* uprobe profile handler */
-static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
+static bool
+__uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
+{
+ struct perf_event *event;
+
+ if (filter->nr_systemwide)
+ return true;
+
+ list_for_each_entry(event, &filter->perf_events, hw.tp_list) {
+ if (event->hw.tp_target->mm == mm)
+ return true;
+ }
+
+ return false;
+}
+
+static inline bool
+uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
+{
+ return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
+}
+
+static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
+{
+ bool done;
+
+ write_lock(&tu->filter.rwlock);
+ if (event->hw.tp_target) {
+ /*
+ * event->parent != NULL means copy_process(), we can avoid
+ * uprobe_apply(). current->mm must be probed and we can rely
+ * on dup_mmap() which preserves the already installed bp's.
+ *
+ * attr.enable_on_exec means that exec/mmap will install the
+ * breakpoints we need.
+ */
+ done = tu->filter.nr_systemwide ||
+ event->parent || event->attr.enable_on_exec ||
+ uprobe_filter_event(tu, event);
+ list_add(&event->hw.tp_list, &tu->filter.perf_events);
+ } else {
+ done = tu->filter.nr_systemwide;
+ tu->filter.nr_systemwide++;
+ }
+ write_unlock(&tu->filter.rwlock);
+
+ if (!done)
+ uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
+
+ return 0;
+}
+
+static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
+{
+ bool done;
+
+ write_lock(&tu->filter.rwlock);
+ if (event->hw.tp_target) {
+ list_del(&event->hw.tp_list);
+ done = tu->filter.nr_systemwide ||
+ (event->hw.tp_target->flags & PF_EXITING) ||
+ uprobe_filter_event(tu, event);
+ } else {
+ tu->filter.nr_systemwide--;
+ done = tu->filter.nr_systemwide;
+ }
+ write_unlock(&tu->filter.rwlock);
+
+ if (!done)
+ uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
+
+ return 0;
+}
+
+static bool uprobe_perf_filter(struct uprobe_consumer *uc,
+ enum uprobe_filter_ctx ctx, struct mm_struct *mm)
+{
+ struct trace_uprobe *tu;
+ int ret;
+
+ tu = container_of(uc, struct trace_uprobe, consumer);
+ read_lock(&tu->filter.rwlock);
+ ret = __uprobe_perf_filter(&tu->filter, mm);
+ read_unlock(&tu->filter.rwlock);
+
+ return ret;
+}
+
+static void uprobe_perf_print(struct trace_uprobe *tu,
+ unsigned long func, struct pt_regs *regs)
{
struct ftrace_event_call *call = &tu->call;
struct uprobe_trace_entry_head *entry;
struct hlist_head *head;
- u8 *data;
- int size, __size, i;
- int rctx;
+ unsigned long ip;
+ void *data;
+ int size, rctx, i;
- __size = sizeof(*entry) + tu->size;
- size = ALIGN(__size + sizeof(u32), sizeof(u64));
- size -= sizeof(u32);
+ size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
+ size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32);
if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
return;
preempt_disable();
-
entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
if (!entry)
goto out;
- entry->ip = uprobe_get_swbp_addr(task_pt_regs(current));
- data = (u8 *)&entry[1];
+ ip = instruction_pointer(regs);
+ if (is_ret_probe(tu)) {
+ entry->vaddr[0] = func;
+ entry->vaddr[1] = ip;
+ data = DATAOF_TRACE_ENTRY(entry, true);
+ } else {
+ entry->vaddr[0] = ip;
+ data = DATAOF_TRACE_ENTRY(entry, false);
+ }
+
for (i = 0; i < tu->nr_args; i++)
call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
head = this_cpu_ptr(call->perf_events);
- perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head, NULL);
-
+ perf_trace_buf_submit(entry, size, rctx, ip, 1, regs, head, NULL);
out:
preempt_enable();
}
+
+/* uprobe profile handler */
+static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
+{
+ if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
+ return UPROBE_HANDLER_REMOVE;
+
+ if (!is_ret_probe(tu))
+ uprobe_perf_print(tu, 0, regs);
+ return 0;
+}
+
+static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
+ struct pt_regs *regs)
+{
+ uprobe_perf_print(tu, func, regs);
+}
#endif /* CONFIG_PERF_EVENTS */
static
int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data)
{
- struct trace_uprobe *tu = (struct trace_uprobe *)event->data;
+ struct trace_uprobe *tu = event->data;
switch (type) {
case TRACE_REG_REGISTER:
- return probe_event_enable(tu, TP_FLAG_TRACE);
+ return probe_event_enable(tu, TP_FLAG_TRACE, NULL);
case TRACE_REG_UNREGISTER:
probe_event_disable(tu, TP_FLAG_TRACE);
#ifdef CONFIG_PERF_EVENTS
case TRACE_REG_PERF_REGISTER:
- return probe_event_enable(tu, TP_FLAG_PROFILE);
+ return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter);
case TRACE_REG_PERF_UNREGISTER:
probe_event_disable(tu, TP_FLAG_PROFILE);
return 0;
+
+ case TRACE_REG_PERF_OPEN:
+ return uprobe_perf_open(tu, data);
+
+ case TRACE_REG_PERF_CLOSE:
+ return uprobe_perf_close(tu, data);
+
#endif
default:
return 0;
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
{
- struct uprobe_trace_consumer *utc;
struct trace_uprobe *tu;
+ int ret = 0;
- utc = container_of(con, struct uprobe_trace_consumer, cons);
- tu = utc->tu;
- if (!tu || tu->consumer != utc)
- return 0;
+ tu = container_of(con, struct trace_uprobe, consumer);
+ tu->nhit++;
+
+ if (tu->flags & TP_FLAG_TRACE)
+ ret |= uprobe_trace_func(tu, regs);
+
+#ifdef CONFIG_PERF_EVENTS
+ if (tu->flags & TP_FLAG_PROFILE)
+ ret |= uprobe_perf_func(tu, regs);
+#endif
+ return ret;
+}
+
+static int uretprobe_dispatcher(struct uprobe_consumer *con,
+ unsigned long func, struct pt_regs *regs)
+{
+ struct trace_uprobe *tu;
+
+ tu = container_of(con, struct trace_uprobe, consumer);
if (tu->flags & TP_FLAG_TRACE)
- uprobe_trace_func(tu, regs);
+ uretprobe_trace_func(tu, func, regs);
#ifdef CONFIG_PERF_EVENTS
if (tu->flags & TP_FLAG_PROFILE)
- uprobe_perf_func(tu, regs);
+ uretprobe_perf_func(tu, func, regs);
#endif
return 0;
}