]> rtime.felk.cvut.cz Git - hercules2020/nv-tegra/linux-4.4.git/blob - rt-patches/0326-tracing-writeback-Replace-cgroup-path-to-cgroup-ino.patch
Fix memguard and related syscalls
[hercules2020/nv-tegra/linux-4.4.git] / rt-patches / 0326-tracing-writeback-Replace-cgroup-path-to-cgroup-ino.patch
1 From 152f724b7c81a31c790a44ceb2ac70b95e3bd07e Mon Sep 17 00:00:00 2001
2 From: Yang Shi <yang.shi@linaro.org>
3 Date: Thu, 3 Mar 2016 01:08:57 -0800
4 Subject: [PATCH 326/366] tracing, writeback: Replace cgroup path to cgroup ino
5
6 commit 5634cc2aa9aebc77bc862992e7805469dcf83dac ("writeback: update writeback
7 tracepoints to report cgroup") made writeback tracepoints print out cgroup
8 path when CGROUP_WRITEBACK is enabled, but it may trigger the below bug on -rt
9 kernel since kernfs_path and kernfs_path_len are called by tracepoints, which
10 acquire spin lock that is sleepable on -rt kernel.
11
12 BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:930
13 in_atomic(): 1, irqs_disabled(): 0, pid: 625, name: kworker/u16:3
14 INFO: lockdep is turned off.
15 Preemption disabled at:[<ffffffc000374a5c>] wb_writeback+0xec/0x830
16
17 CPU: 7 PID: 625 Comm: kworker/u16:3 Not tainted 4.4.1-rt5 #20
18 Hardware name: Freescale Layerscape 2085a RDB Board (DT)
19 Workqueue: writeback wb_workfn (flush-7:0)
20 Call trace:
21 [<ffffffc00008d708>] dump_backtrace+0x0/0x200
22 [<ffffffc00008d92c>] show_stack+0x24/0x30
23 [<ffffffc0007b0f40>] dump_stack+0x88/0xa8
24 [<ffffffc000127d74>] ___might_sleep+0x2ec/0x300
25 [<ffffffc000d5d550>] rt_spin_lock+0x38/0xb8
26 [<ffffffc0003e0548>] kernfs_path_len+0x30/0x90
27 [<ffffffc00036b360>] trace_event_raw_event_writeback_work_class+0xe8/0x2e8
28 [<ffffffc000374f90>] wb_writeback+0x620/0x830
29 [<ffffffc000376224>] wb_workfn+0x61c/0x950
30 [<ffffffc000110adc>] process_one_work+0x3ac/0xb30
31 [<ffffffc0001112fc>] worker_thread+0x9c/0x7a8
32 [<ffffffc00011a9e8>] kthread+0x190/0x1b0
33 [<ffffffc000086ca0>] ret_from_fork+0x10/0x30
34
35 With unlocked kernfs_* functions, synchronize_sched() has to be called in
36 kernfs_rename which could be called in syscall path, but it is problematic.
37 So, print out cgroup ino instead of path name, which could be converted to
38 path name by userland.
39
40 Withouth CGROUP_WRITEBACK enabled, it just prints out root dir. But, root
41 dir ino vary from different filesystems, so printing out -1U to indicate
42 an invalid cgroup ino.
43
44 Link: http://lkml.kernel.org/r/1456996137-8354-1-git-send-email-yang.shi@linaro.org
45
46 Acked-by: Tejun Heo <tj@kernel.org>
47 Signed-off-by: Yang Shi <yang.shi@linaro.org>
48 Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
49 ---
50  include/trace/events/writeback.h | 121 +++++++++++++++------------------------
51  1 file changed, 45 insertions(+), 76 deletions(-)
52
53 diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
54 index fff846b..73614ce 100644
55 --- a/include/trace/events/writeback.h
56 +++ b/include/trace/events/writeback.h
57 @@ -134,58 +134,28 @@ DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode,
58  #ifdef CREATE_TRACE_POINTS
59  #ifdef CONFIG_CGROUP_WRITEBACK
60  
61 -static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb)
62 +static inline unsigned int __trace_wb_assign_cgroup(struct bdi_writeback *wb)
63  {
64 -       return kernfs_path_len(wb->memcg_css->cgroup->kn) + 1;
65 +       return wb->memcg_css->cgroup->kn->ino;
66  }
67  
68 -static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb)
69 -{
70 -       struct cgroup *cgrp = wb->memcg_css->cgroup;
71 -       char *path;
72 -
73 -       path = cgroup_path(cgrp, buf, kernfs_path_len(cgrp->kn) + 1);
74 -       WARN_ON_ONCE(path != buf);
75 -}
76 -
77 -static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc)
78 -{
79 -       if (wbc->wb)
80 -               return __trace_wb_cgroup_size(wbc->wb);
81 -       else
82 -               return 2;
83 -}
84 -
85 -static inline void __trace_wbc_assign_cgroup(char *buf,
86 -                                            struct writeback_control *wbc)
87 +static inline unsigned int __trace_wbc_assign_cgroup(struct writeback_control *wbc)
88  {
89         if (wbc->wb)
90 -               __trace_wb_assign_cgroup(buf, wbc->wb);
91 +               return __trace_wb_assign_cgroup(wbc->wb);
92         else
93 -               strcpy(buf, "/");
94 +               return -1U;
95  }
96 -
97  #else  /* CONFIG_CGROUP_WRITEBACK */
98  
99 -static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb)
100 -{
101 -       return 2;
102 -}
103 -
104 -static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb)
105 -{
106 -       strcpy(buf, "/");
107 -}
108 -
109 -static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc)
110 +static inline unsigned int __trace_wb_assign_cgroup(struct bdi_writeback *wb)
111  {
112 -       return 2;
113 +       return -1U;
114  }
115  
116 -static inline void __trace_wbc_assign_cgroup(char *buf,
117 -                                            struct writeback_control *wbc)
118 +static inline unsigned int __trace_wbc_assign_cgroup(struct writeback_control *wbc)
119  {
120 -       strcpy(buf, "/");
121 +       return -1U;
122  }
123  
124  #endif /* CONFIG_CGROUP_WRITEBACK */
125 @@ -201,7 +171,7 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template,
126                 __array(char, name, 32)
127                 __field(unsigned long, ino)
128                 __field(int, sync_mode)
129 -               __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
130 +               __field(unsigned int, cgroup_ino)
131         ),
132  
133         TP_fast_assign(
134 @@ -209,14 +179,14 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template,
135                         dev_name(inode_to_bdi(inode)->dev), 32);
136                 __entry->ino            = inode->i_ino;
137                 __entry->sync_mode      = wbc->sync_mode;
138 -               __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
139 +               __entry->cgroup_ino     = __trace_wbc_assign_cgroup(wbc);
140         ),
141  
142 -       TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup=%s",
143 +       TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup_ino=%u",
144                 __entry->name,
145                 __entry->ino,
146                 __entry->sync_mode,
147 -               __get_str(cgroup)
148 +               __entry->cgroup_ino
149         )
150  );
151  
152 @@ -246,7 +216,7 @@ DECLARE_EVENT_CLASS(writeback_work_class,
153                 __field(int, range_cyclic)
154                 __field(int, for_background)
155                 __field(int, reason)
156 -               __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
157 +               __field(unsigned int, cgroup_ino)
158         ),
159         TP_fast_assign(
160                 strncpy(__entry->name,
161 @@ -258,10 +228,10 @@ DECLARE_EVENT_CLASS(writeback_work_class,
162                 __entry->range_cyclic = work->range_cyclic;
163                 __entry->for_background = work->for_background;
164                 __entry->reason = work->reason;
165 -               __trace_wb_assign_cgroup(__get_str(cgroup), wb);
166 +               __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
167         ),
168         TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
169 -                 "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup=%s",
170 +                 "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup_ino=%u",
171                   __entry->name,
172                   MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
173                   __entry->nr_pages,
174 @@ -270,7 +240,7 @@ DECLARE_EVENT_CLASS(writeback_work_class,
175                   __entry->range_cyclic,
176                   __entry->for_background,
177                   __print_symbolic(__entry->reason, WB_WORK_REASON),
178 -                 __get_str(cgroup)
179 +                 __entry->cgroup_ino
180         )
181  );
182  #define DEFINE_WRITEBACK_WORK_EVENT(name) \
183 @@ -300,15 +270,15 @@ DECLARE_EVENT_CLASS(writeback_class,
184         TP_ARGS(wb),
185         TP_STRUCT__entry(
186                 __array(char, name, 32)
187 -               __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
188 +               __field(unsigned int, cgroup_ino)
189         ),
190         TP_fast_assign(
191                 strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
192 -               __trace_wb_assign_cgroup(__get_str(cgroup), wb);
193 +               __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
194         ),
195 -       TP_printk("bdi %s: cgroup=%s",
196 +       TP_printk("bdi %s: cgroup_ino=%u",
197                   __entry->name,
198 -                 __get_str(cgroup)
199 +                 __entry->cgroup_ino
200         )
201  );
202  #define DEFINE_WRITEBACK_EVENT(name) \
203 @@ -347,7 +317,7 @@ DECLARE_EVENT_CLASS(wbc_class,
204                 __field(int, range_cyclic)
205                 __field(long, range_start)
206                 __field(long, range_end)
207 -               __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
208 +               __field(unsigned int, cgroup_ino)
209         ),
210  
211         TP_fast_assign(
212 @@ -361,12 +331,12 @@ DECLARE_EVENT_CLASS(wbc_class,
213                 __entry->range_cyclic   = wbc->range_cyclic;
214                 __entry->range_start    = (long)wbc->range_start;
215                 __entry->range_end      = (long)wbc->range_end;
216 -               __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
217 +               __entry->cgroup_ino     = __trace_wbc_assign_cgroup(wbc);
218         ),
219  
220         TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d "
221                 "bgrd=%d reclm=%d cyclic=%d "
222 -               "start=0x%lx end=0x%lx cgroup=%s",
223 +               "start=0x%lx end=0x%lx cgroup_ino=%u",
224                 __entry->name,
225                 __entry->nr_to_write,
226                 __entry->pages_skipped,
227 @@ -377,7 +347,7 @@ DECLARE_EVENT_CLASS(wbc_class,
228                 __entry->range_cyclic,
229                 __entry->range_start,
230                 __entry->range_end,
231 -               __get_str(cgroup)
232 +               __entry->cgroup_ino
233         )
234  )
235  
236 @@ -398,7 +368,7 @@ TRACE_EVENT(writeback_queue_io,
237                 __field(long,           age)
238                 __field(int,            moved)
239                 __field(int,            reason)
240 -               __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
241 +               __field(unsigned int,   cgroup_ino)
242         ),
243         TP_fast_assign(
244                 unsigned long *older_than_this = work->older_than_this;
245 @@ -408,15 +378,15 @@ TRACE_EVENT(writeback_queue_io,
246                                   (jiffies - *older_than_this) * 1000 / HZ : -1;
247                 __entry->moved  = moved;
248                 __entry->reason = work->reason;
249 -               __trace_wb_assign_cgroup(__get_str(cgroup), wb);
250 +               __entry->cgroup_ino     = __trace_wb_assign_cgroup(wb);
251         ),
252 -       TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup=%s",
253 +       TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup_ino=%u",
254                 __entry->name,
255                 __entry->older, /* older_than_this in jiffies */
256                 __entry->age,   /* older_than_this in relative milliseconds */
257                 __entry->moved,
258                 __print_symbolic(__entry->reason, WB_WORK_REASON),
259 -               __get_str(cgroup)
260 +               __entry->cgroup_ino
261         )
262  );
263  
264 @@ -484,7 +454,7 @@ TRACE_EVENT(bdi_dirty_ratelimit,
265                 __field(unsigned long,  dirty_ratelimit)
266                 __field(unsigned long,  task_ratelimit)
267                 __field(unsigned long,  balanced_dirty_ratelimit)
268 -               __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
269 +               __field(unsigned int,   cgroup_ino)
270         ),
271  
272         TP_fast_assign(
273 @@ -496,13 +466,13 @@ TRACE_EVENT(bdi_dirty_ratelimit,
274                 __entry->task_ratelimit = KBps(task_ratelimit);
275                 __entry->balanced_dirty_ratelimit =
276                                         KBps(wb->balanced_dirty_ratelimit);
277 -               __trace_wb_assign_cgroup(__get_str(cgroup), wb);
278 +               __entry->cgroup_ino     = __trace_wb_assign_cgroup(wb);
279         ),
280  
281         TP_printk("bdi %s: "
282                   "write_bw=%lu awrite_bw=%lu dirty_rate=%lu "
283                   "dirty_ratelimit=%lu task_ratelimit=%lu "
284 -                 "balanced_dirty_ratelimit=%lu cgroup=%s",
285 +                 "balanced_dirty_ratelimit=%lu cgroup_ino=%u",
286                   __entry->bdi,
287                   __entry->write_bw,            /* write bandwidth */
288                   __entry->avg_write_bw,        /* avg write bandwidth */
289 @@ -510,7 +480,7 @@ TRACE_EVENT(bdi_dirty_ratelimit,
290                   __entry->dirty_ratelimit,     /* base ratelimit */
291                   __entry->task_ratelimit, /* ratelimit with position control */
292                   __entry->balanced_dirty_ratelimit, /* the balanced ratelimit */
293 -                 __get_str(cgroup)
294 +                 __entry->cgroup_ino
295         )
296  );
297  
298 @@ -548,7 +518,7 @@ TRACE_EVENT(balance_dirty_pages,
299                 __field(         long,  pause)
300                 __field(unsigned long,  period)
301                 __field(         long,  think)
302 -               __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
303 +               __field(unsigned int,   cgroup_ino)
304         ),
305  
306         TP_fast_assign(
307 @@ -571,7 +541,7 @@ TRACE_EVENT(balance_dirty_pages,
308                 __entry->period         = period * 1000 / HZ;
309                 __entry->pause          = pause * 1000 / HZ;
310                 __entry->paused         = (jiffies - start_time) * 1000 / HZ;
311 -               __trace_wb_assign_cgroup(__get_str(cgroup), wb);
312 +               __entry->cgroup_ino     = __trace_wb_assign_cgroup(wb);
313         ),
314  
315  
316 @@ -580,7 +550,7 @@ TRACE_EVENT(balance_dirty_pages,
317                   "bdi_setpoint=%lu bdi_dirty=%lu "
318                   "dirty_ratelimit=%lu task_ratelimit=%lu "
319                   "dirtied=%u dirtied_pause=%u "
320 -                 "paused=%lu pause=%ld period=%lu think=%ld cgroup=%s",
321 +                 "paused=%lu pause=%ld period=%lu think=%ld cgroup_ino=%u",
322                   __entry->bdi,
323                   __entry->limit,
324                   __entry->setpoint,
325 @@ -595,7 +565,7 @@ TRACE_EVENT(balance_dirty_pages,
326                   __entry->pause,       /* ms */
327                   __entry->period,      /* ms */
328                   __entry->think,       /* ms */
329 -                 __get_str(cgroup)
330 +                 __entry->cgroup_ino
331           )
332  );
333  
334 @@ -609,8 +579,7 @@ TRACE_EVENT(writeback_sb_inodes_requeue,
335                 __field(unsigned long, ino)
336                 __field(unsigned long, state)
337                 __field(unsigned long, dirtied_when)
338 -               __dynamic_array(char, cgroup,
339 -                               __trace_wb_cgroup_size(inode_to_wb(inode)))
340 +               __field(unsigned int, cgroup_ino)
341         ),
342  
343         TP_fast_assign(
344 @@ -619,16 +588,16 @@ TRACE_EVENT(writeback_sb_inodes_requeue,
345                 __entry->ino            = inode->i_ino;
346                 __entry->state          = inode->i_state;
347                 __entry->dirtied_when   = inode->dirtied_when;
348 -               __trace_wb_assign_cgroup(__get_str(cgroup), inode_to_wb(inode));
349 +               __entry->cgroup_ino     = __trace_wb_assign_cgroup(inode_to_wb(inode));
350         ),
351  
352 -       TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup=%s",
353 +       TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup_ino=%u",
354                   __entry->name,
355                   __entry->ino,
356                   show_inode_state(__entry->state),
357                   __entry->dirtied_when,
358                   (jiffies - __entry->dirtied_when) / HZ,
359 -                 __get_str(cgroup)
360 +                 __entry->cgroup_ino
361         )
362  );
363  
364 @@ -684,7 +653,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
365                 __field(unsigned long, writeback_index)
366                 __field(long, nr_to_write)
367                 __field(unsigned long, wrote)
368 -               __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
369 +               __field(unsigned int, cgroup_ino)
370         ),
371  
372         TP_fast_assign(
373 @@ -696,11 +665,11 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
374                 __entry->writeback_index = inode->i_mapping->writeback_index;
375                 __entry->nr_to_write    = nr_to_write;
376                 __entry->wrote          = nr_to_write - wbc->nr_to_write;
377 -               __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
378 +               __entry->cgroup_ino     = __trace_wbc_assign_cgroup(wbc);
379         ),
380  
381         TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu "
382 -                 "index=%lu to_write=%ld wrote=%lu cgroup=%s",
383 +                 "index=%lu to_write=%ld wrote=%lu cgroup_ino=%u",
384                   __entry->name,
385                   __entry->ino,
386                   show_inode_state(__entry->state),
387 @@ -709,7 +678,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
388                   __entry->writeback_index,
389                   __entry->nr_to_write,
390                   __entry->wrote,
391 -                 __get_str(cgroup)
392 +                 __entry->cgroup_ino
393         )
394  );
395  
396 -- 
397 1.9.1
398