1 From 152f724b7c81a31c790a44ceb2ac70b95e3bd07e Mon Sep 17 00:00:00 2001
2 From: Yang Shi <yang.shi@linaro.org>
3 Date: Thu, 3 Mar 2016 01:08:57 -0800
4 Subject: [PATCH 326/366] tracing, writeback: Replace cgroup path to cgroup ino
6 commit 5634cc2aa9aebc77bc862992e7805469dcf83dac ("writeback: update writeback
7 tracepoints to report cgroup") made writeback tracepoints print out cgroup
8 path when CGROUP_WRITEBACK is enabled, but it may trigger the below bug on -rt
9 kernel since kernfs_path and kernfs_path_len are called by tracepoints, which
10 acquire spin lock that is sleepable on -rt kernel.
12 BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:930
13 in_atomic(): 1, irqs_disabled(): 0, pid: 625, name: kworker/u16:3
14 INFO: lockdep is turned off.
15 Preemption disabled at:[<ffffffc000374a5c>] wb_writeback+0xec/0x830
17 CPU: 7 PID: 625 Comm: kworker/u16:3 Not tainted 4.4.1-rt5 #20
18 Hardware name: Freescale Layerscape 2085a RDB Board (DT)
19 Workqueue: writeback wb_workfn (flush-7:0)
21 [<ffffffc00008d708>] dump_backtrace+0x0/0x200
22 [<ffffffc00008d92c>] show_stack+0x24/0x30
23 [<ffffffc0007b0f40>] dump_stack+0x88/0xa8
24 [<ffffffc000127d74>] ___might_sleep+0x2ec/0x300
25 [<ffffffc000d5d550>] rt_spin_lock+0x38/0xb8
26 [<ffffffc0003e0548>] kernfs_path_len+0x30/0x90
27 [<ffffffc00036b360>] trace_event_raw_event_writeback_work_class+0xe8/0x2e8
28 [<ffffffc000374f90>] wb_writeback+0x620/0x830
29 [<ffffffc000376224>] wb_workfn+0x61c/0x950
30 [<ffffffc000110adc>] process_one_work+0x3ac/0xb30
31 [<ffffffc0001112fc>] worker_thread+0x9c/0x7a8
32 [<ffffffc00011a9e8>] kthread+0x190/0x1b0
33 [<ffffffc000086ca0>] ret_from_fork+0x10/0x30
35 With unlocked kernfs_* functions, synchronize_sched() has to be called in
36 kernfs_rename which could be called in syscall path, but it is problematic.
37 So, print out cgroup ino instead of path name, which could be converted to
38 path name by userland.
40 Withouth CGROUP_WRITEBACK enabled, it just prints out root dir. But, root
41 dir ino vary from different filesystems, so printing out -1U to indicate
42 an invalid cgroup ino.
44 Link: http://lkml.kernel.org/r/1456996137-8354-1-git-send-email-yang.shi@linaro.org
46 Acked-by: Tejun Heo <tj@kernel.org>
47 Signed-off-by: Yang Shi <yang.shi@linaro.org>
48 Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
50 include/trace/events/writeback.h | 121 +++++++++++++++------------------------
51 1 file changed, 45 insertions(+), 76 deletions(-)
53 diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
54 index fff846b..73614ce 100644
55 --- a/include/trace/events/writeback.h
56 +++ b/include/trace/events/writeback.h
57 @@ -134,58 +134,28 @@ DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode,
58 #ifdef CREATE_TRACE_POINTS
59 #ifdef CONFIG_CGROUP_WRITEBACK
61 -static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb)
62 +static inline unsigned int __trace_wb_assign_cgroup(struct bdi_writeback *wb)
64 - return kernfs_path_len(wb->memcg_css->cgroup->kn) + 1;
65 + return wb->memcg_css->cgroup->kn->ino;
68 -static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb)
70 - struct cgroup *cgrp = wb->memcg_css->cgroup;
73 - path = cgroup_path(cgrp, buf, kernfs_path_len(cgrp->kn) + 1);
74 - WARN_ON_ONCE(path != buf);
77 -static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc)
80 - return __trace_wb_cgroup_size(wbc->wb);
85 -static inline void __trace_wbc_assign_cgroup(char *buf,
86 - struct writeback_control *wbc)
87 +static inline unsigned int __trace_wbc_assign_cgroup(struct writeback_control *wbc)
90 - __trace_wb_assign_cgroup(buf, wbc->wb);
91 + return __trace_wb_assign_cgroup(wbc->wb);
97 #else /* CONFIG_CGROUP_WRITEBACK */
99 -static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb)
104 -static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb)
109 -static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc)
110 +static inline unsigned int __trace_wb_assign_cgroup(struct bdi_writeback *wb)
116 -static inline void __trace_wbc_assign_cgroup(char *buf,
117 - struct writeback_control *wbc)
118 +static inline unsigned int __trace_wbc_assign_cgroup(struct writeback_control *wbc)
124 #endif /* CONFIG_CGROUP_WRITEBACK */
125 @@ -201,7 +171,7 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template,
126 __array(char, name, 32)
127 __field(unsigned long, ino)
128 __field(int, sync_mode)
129 - __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
130 + __field(unsigned int, cgroup_ino)
134 @@ -209,14 +179,14 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template,
135 dev_name(inode_to_bdi(inode)->dev), 32);
136 __entry->ino = inode->i_ino;
137 __entry->sync_mode = wbc->sync_mode;
138 - __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
139 + __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
142 - TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup=%s",
143 + TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup_ino=%u",
148 + __entry->cgroup_ino
152 @@ -246,7 +216,7 @@ DECLARE_EVENT_CLASS(writeback_work_class,
153 __field(int, range_cyclic)
154 __field(int, for_background)
156 - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
157 + __field(unsigned int, cgroup_ino)
160 strncpy(__entry->name,
161 @@ -258,10 +228,10 @@ DECLARE_EVENT_CLASS(writeback_work_class,
162 __entry->range_cyclic = work->range_cyclic;
163 __entry->for_background = work->for_background;
164 __entry->reason = work->reason;
165 - __trace_wb_assign_cgroup(__get_str(cgroup), wb);
166 + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
168 TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
169 - "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup=%s",
170 + "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup_ino=%u",
172 MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
174 @@ -270,7 +240,7 @@ DECLARE_EVENT_CLASS(writeback_work_class,
175 __entry->range_cyclic,
176 __entry->for_background,
177 __print_symbolic(__entry->reason, WB_WORK_REASON),
179 + __entry->cgroup_ino
182 #define DEFINE_WRITEBACK_WORK_EVENT(name) \
183 @@ -300,15 +270,15 @@ DECLARE_EVENT_CLASS(writeback_class,
186 __array(char, name, 32)
187 - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
188 + __field(unsigned int, cgroup_ino)
191 strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
192 - __trace_wb_assign_cgroup(__get_str(cgroup), wb);
193 + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
195 - TP_printk("bdi %s: cgroup=%s",
196 + TP_printk("bdi %s: cgroup_ino=%u",
199 + __entry->cgroup_ino
202 #define DEFINE_WRITEBACK_EVENT(name) \
203 @@ -347,7 +317,7 @@ DECLARE_EVENT_CLASS(wbc_class,
204 __field(int, range_cyclic)
205 __field(long, range_start)
206 __field(long, range_end)
207 - __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
208 + __field(unsigned int, cgroup_ino)
212 @@ -361,12 +331,12 @@ DECLARE_EVENT_CLASS(wbc_class,
213 __entry->range_cyclic = wbc->range_cyclic;
214 __entry->range_start = (long)wbc->range_start;
215 __entry->range_end = (long)wbc->range_end;
216 - __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
217 + __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
220 TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d "
221 "bgrd=%d reclm=%d cyclic=%d "
222 - "start=0x%lx end=0x%lx cgroup=%s",
223 + "start=0x%lx end=0x%lx cgroup_ino=%u",
225 __entry->nr_to_write,
226 __entry->pages_skipped,
227 @@ -377,7 +347,7 @@ DECLARE_EVENT_CLASS(wbc_class,
228 __entry->range_cyclic,
229 __entry->range_start,
232 + __entry->cgroup_ino
236 @@ -398,7 +368,7 @@ TRACE_EVENT(writeback_queue_io,
240 - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
241 + __field(unsigned int, cgroup_ino)
244 unsigned long *older_than_this = work->older_than_this;
245 @@ -408,15 +378,15 @@ TRACE_EVENT(writeback_queue_io,
246 (jiffies - *older_than_this) * 1000 / HZ : -1;
247 __entry->moved = moved;
248 __entry->reason = work->reason;
249 - __trace_wb_assign_cgroup(__get_str(cgroup), wb);
250 + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
252 - TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup=%s",
253 + TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup_ino=%u",
255 __entry->older, /* older_than_this in jiffies */
256 __entry->age, /* older_than_this in relative milliseconds */
258 __print_symbolic(__entry->reason, WB_WORK_REASON),
260 + __entry->cgroup_ino
264 @@ -484,7 +454,7 @@ TRACE_EVENT(bdi_dirty_ratelimit,
265 __field(unsigned long, dirty_ratelimit)
266 __field(unsigned long, task_ratelimit)
267 __field(unsigned long, balanced_dirty_ratelimit)
268 - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
269 + __field(unsigned int, cgroup_ino)
273 @@ -496,13 +466,13 @@ TRACE_EVENT(bdi_dirty_ratelimit,
274 __entry->task_ratelimit = KBps(task_ratelimit);
275 __entry->balanced_dirty_ratelimit =
276 KBps(wb->balanced_dirty_ratelimit);
277 - __trace_wb_assign_cgroup(__get_str(cgroup), wb);
278 + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
282 "write_bw=%lu awrite_bw=%lu dirty_rate=%lu "
283 "dirty_ratelimit=%lu task_ratelimit=%lu "
284 - "balanced_dirty_ratelimit=%lu cgroup=%s",
285 + "balanced_dirty_ratelimit=%lu cgroup_ino=%u",
287 __entry->write_bw, /* write bandwidth */
288 __entry->avg_write_bw, /* avg write bandwidth */
289 @@ -510,7 +480,7 @@ TRACE_EVENT(bdi_dirty_ratelimit,
290 __entry->dirty_ratelimit, /* base ratelimit */
291 __entry->task_ratelimit, /* ratelimit with position control */
292 __entry->balanced_dirty_ratelimit, /* the balanced ratelimit */
294 + __entry->cgroup_ino
298 @@ -548,7 +518,7 @@ TRACE_EVENT(balance_dirty_pages,
299 __field( long, pause)
300 __field(unsigned long, period)
301 __field( long, think)
302 - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
303 + __field(unsigned int, cgroup_ino)
307 @@ -571,7 +541,7 @@ TRACE_EVENT(balance_dirty_pages,
308 __entry->period = period * 1000 / HZ;
309 __entry->pause = pause * 1000 / HZ;
310 __entry->paused = (jiffies - start_time) * 1000 / HZ;
311 - __trace_wb_assign_cgroup(__get_str(cgroup), wb);
312 + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
316 @@ -580,7 +550,7 @@ TRACE_EVENT(balance_dirty_pages,
317 "bdi_setpoint=%lu bdi_dirty=%lu "
318 "dirty_ratelimit=%lu task_ratelimit=%lu "
319 "dirtied=%u dirtied_pause=%u "
320 - "paused=%lu pause=%ld period=%lu think=%ld cgroup=%s",
321 + "paused=%lu pause=%ld period=%lu think=%ld cgroup_ino=%u",
325 @@ -595,7 +565,7 @@ TRACE_EVENT(balance_dirty_pages,
326 __entry->pause, /* ms */
327 __entry->period, /* ms */
328 __entry->think, /* ms */
330 + __entry->cgroup_ino
334 @@ -609,8 +579,7 @@ TRACE_EVENT(writeback_sb_inodes_requeue,
335 __field(unsigned long, ino)
336 __field(unsigned long, state)
337 __field(unsigned long, dirtied_when)
338 - __dynamic_array(char, cgroup,
339 - __trace_wb_cgroup_size(inode_to_wb(inode)))
340 + __field(unsigned int, cgroup_ino)
344 @@ -619,16 +588,16 @@ TRACE_EVENT(writeback_sb_inodes_requeue,
345 __entry->ino = inode->i_ino;
346 __entry->state = inode->i_state;
347 __entry->dirtied_when = inode->dirtied_when;
348 - __trace_wb_assign_cgroup(__get_str(cgroup), inode_to_wb(inode));
349 + __entry->cgroup_ino = __trace_wb_assign_cgroup(inode_to_wb(inode));
352 - TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup=%s",
353 + TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup_ino=%u",
356 show_inode_state(__entry->state),
357 __entry->dirtied_when,
358 (jiffies - __entry->dirtied_when) / HZ,
360 + __entry->cgroup_ino
364 @@ -684,7 +653,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
365 __field(unsigned long, writeback_index)
366 __field(long, nr_to_write)
367 __field(unsigned long, wrote)
368 - __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
369 + __field(unsigned int, cgroup_ino)
373 @@ -696,11 +665,11 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
374 __entry->writeback_index = inode->i_mapping->writeback_index;
375 __entry->nr_to_write = nr_to_write;
376 __entry->wrote = nr_to_write - wbc->nr_to_write;
377 - __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
378 + __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
381 TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu "
382 - "index=%lu to_write=%ld wrote=%lu cgroup=%s",
383 + "index=%lu to_write=%ld wrote=%lu cgroup_ino=%u",
386 show_inode_state(__entry->state),
387 @@ -709,7 +678,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
388 __entry->writeback_index,
389 __entry->nr_to_write,
392 + __entry->cgroup_ino