From d954fbf0ff6b5fdfb32350e85a2f15d3db976506 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2009 23:46:52 +0100 Subject: [PATCH 01/20] tracing: Fix wrong usage of strstrip in trace_ksyms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit strstrip returns a pointer to the first non space character, but the code in parse_ksym_trace_str() ignores that. strstrip is now must_check and therefor we get the correct warning: kernel/trace/trace_ksym.c:294: warning: ignoring return value of ‘strstrip’, declared with attribute warn_unused_result We are really not interested in leading whitespace here. Fix that and cleanup the dozen kfree() exit pathes. Signed-off-by: Thomas Gleixner Cc: Xiao Guangrong Cc: Steven Rostedt --- kernel/trace/trace_ksym.c | 49 +++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index ddfa0fd43bc..64e7a5bd669 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -277,21 +277,20 @@ static ssize_t ksym_trace_filter_write(struct file *file, { struct trace_ksym *entry; struct hlist_node *node; - char *input_string, *ksymname = NULL; + char *buf, *input_string, *ksymname = NULL; unsigned long ksym_addr = 0; int ret, op, changed = 0; - input_string = kzalloc(count + 1, GFP_KERNEL); - if (!input_string) + buf = kzalloc(count + 1, GFP_KERNEL); + if (!buf) return -ENOMEM; - if (copy_from_user(input_string, buffer, count)) { - kfree(input_string); - return -EFAULT; - } - input_string[count] = '\0'; + ret = -EFAULT; + if (copy_from_user(buf, buffer, count)) + goto out; - strstrip(input_string); + buf[count] = '\0'; + input_string = strstrip(buf); /* * Clear all breakpoints if: @@ -302,15 +301,13 @@ static ssize_t ksym_trace_filter_write(struct file *file, if (!input_string[0] || !strcmp(input_string, "0") || !strcmp(input_string, "*:---")) { __ksym_trace_reset(); - kfree(input_string); - return count; + ret = 0; + goto out; } ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr); - if (ret < 0) { - kfree(input_string); - return ret; - } + if (ret < 0) + goto out; mutex_lock(&ksym_tracer_mutex); @@ -321,7 +318,7 @@ static ssize_t ksym_trace_filter_write(struct file *file, if (entry->attr.bp_type != op) changed = 1; else - goto out; + goto out_unlock; break; } } @@ -336,28 +333,24 @@ static ssize_t ksym_trace_filter_write(struct file *file, if (IS_ERR(entry->ksym_hbp)) ret = PTR_ERR(entry->ksym_hbp); else - goto out; + goto out_unlock; } /* Error or "symbol:---" case: drop it */ ksym_filter_entry_count--; hlist_del_rcu(&(entry->ksym_hlist)); synchronize_rcu(); kfree(entry); - goto out; + goto out_unlock; } else { /* Check for malformed request: (4) */ - if (op == 0) - goto out; - ret = process_new_ksym_entry(ksymname, op, ksym_addr); + if (op) + ret = process_new_ksym_entry(ksymname, op, ksym_addr); } -out: +out_unlock: mutex_unlock(&ksym_tracer_mutex); - - kfree(input_string); - - if (!ret) - ret = count; - return ret; +out: + kfree(buf); + return !ret ? count : ret; } static const struct file_operations ksym_tracing_fops = { From 184210154b9aa570099183f6c062ac4eb11190b7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 10 Dec 2009 22:54:27 -0500 Subject: [PATCH 02/20] ring-buffer: Use sync sched protection on ring buffer resizing There was a comment in the ring buffer code that says the calling layers should prevent tracing or reading of the ring buffer while resizing. I have discovered that the tracers do not honor this arrangement. This patch moves the disabling and synchronizing the ring buffer to a higher layer during resizing. This guarantees that no writes are occurring while the resize takes place. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a1ca4956ab5..0d64c51ab4d 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1193,9 +1193,6 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) struct list_head *p; unsigned i; - atomic_inc(&cpu_buffer->record_disabled); - synchronize_sched(); - spin_lock_irq(&cpu_buffer->reader_lock); rb_head_page_deactivate(cpu_buffer); @@ -1214,9 +1211,6 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) spin_unlock_irq(&cpu_buffer->reader_lock); rb_check_pages(cpu_buffer); - - atomic_dec(&cpu_buffer->record_disabled); - } static void @@ -1227,9 +1221,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, struct list_head *p; unsigned i; - atomic_inc(&cpu_buffer->record_disabled); - synchronize_sched(); - spin_lock_irq(&cpu_buffer->reader_lock); rb_head_page_deactivate(cpu_buffer); @@ -1245,8 +1236,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, spin_unlock_irq(&cpu_buffer->reader_lock); rb_check_pages(cpu_buffer); - - atomic_dec(&cpu_buffer->record_disabled); } /** @@ -1254,11 +1243,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, * @buffer: the buffer to resize. * @size: the new size. * - * The tracer is responsible for making sure that the buffer is - * not being used while changing the size. - * Note: We may be able to change the above requirement by using - * RCU synchronizations. - * * Minimum size is 2 * BUF_PAGE_SIZE. * * Returns -1 on failure. @@ -1290,6 +1274,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) if (size == buffer_size) return size; + atomic_inc(&buffer->record_disabled); + + /* Make sure all writers are done with this buffer. */ + synchronize_sched(); + mutex_lock(&buffer->mutex); get_online_cpus(); @@ -1352,6 +1341,8 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) put_online_cpus(); mutex_unlock(&buffer->mutex); + atomic_dec(&buffer->record_disabled); + return size; free_pages: @@ -1361,6 +1352,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) } put_online_cpus(); mutex_unlock(&buffer->mutex); + atomic_dec(&buffer->record_disabled); return -ENOMEM; /* @@ -1370,6 +1362,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) out_fail: put_online_cpus(); mutex_unlock(&buffer->mutex); + atomic_dec(&buffer->record_disabled); return -1; } EXPORT_SYMBOL_GPL(ring_buffer_resize); From dd7f59435782a02ceb6d16b9ce823dd3345d75ec Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 10 Dec 2009 23:20:52 -0500 Subject: [PATCH 03/20] ring-buffer: Move resize integrity check under reader lock While using an application that does splice on the ftrace ring buffer at start up, I triggered an integrity check failure. Looking into this, I discovered that resizing the buffer performs an integrity check after the buffer is resized. This check unfortunately is preformed after it releases the reader lock. If a reader is reading the buffer it may cause the integrity check to trigger a false failure. This patch simply moves the integrity checker under the protection of the ring buffer reader lock. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 0d64c51ab4d..eccb4cf1e99 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1208,9 +1208,9 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) return; rb_reset_cpu(cpu_buffer); - spin_unlock_irq(&cpu_buffer->reader_lock); - rb_check_pages(cpu_buffer); + + spin_unlock_irq(&cpu_buffer->reader_lock); } static void @@ -1233,9 +1233,9 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, list_add_tail(&bpage->list, cpu_buffer->pages); } rb_reset_cpu(cpu_buffer); - spin_unlock_irq(&cpu_buffer->reader_lock); - rb_check_pages(cpu_buffer); + + spin_unlock_irq(&cpu_buffer->reader_lock); } /** From 03889384cee7a198a79447c1ea6aca2c8e54d155 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 11 Dec 2009 09:48:22 -0500 Subject: [PATCH 04/20] tracing: Add trace_dump_stack() I've been asked a few times about how to find out what is calling some location in the kernel. One way is to use dynamic function tracing and implement the func_stack_trace. But this only finds out who is calling a particular function. It does not tell you who is calling that function and entering a specific if conditional. I have myself implemented a quick version of trace_dump_stack() for this purpose a few times, and just needed it now. This is when I realized that this would be a good tool to have in the kernel like trace_printk(). Using trace_dump_stack() is similar to dump_stack() except that it writes to the trace buffer instead and can be used in critical locations. For example: @@ -5485,8 +5485,12 @@ need_resched_nonpreemptible: if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { if (unlikely(signal_pending_state(prev->state, prev))) prev->state = TASK_RUNNING; - else + else { deactivate_task(rq, prev, 1); + trace_printk("Deactivating task %s:%d\n", + prev->comm, prev->pid); + trace_dump_stack(); + } switch_count = &prev->nvcsw; } Produces: <...>-3249 [001] 296.105269: schedule: Deactivating task ntpd:3249 <...>-3249 [001] 296.105270: => schedule => schedule_hrtimeout_range => poll_schedule_timeout => do_select => core_sys_select => sys_select => system_call_fastpath Signed-off-by: Steven Rostedt --- include/linux/kernel.h | 2 ++ kernel/trace/trace.c | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3fa4c590cf1..5ad4199fb07 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -492,6 +492,8 @@ extern int __trace_printk(unsigned long ip, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); +extern void trace_dump_stack(void); + /* * The double __builtin_constant_p is because gcc will give us an error * if we try to allocate the static variable to fmt if it is not a diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 88bd9ae2a9e..f531301b7a3 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1151,6 +1151,22 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, __ftrace_trace_stack(tr->buffer, flags, skip, pc); } +/** + * trace_dump_stack - record a stack back trace in the trace buffer + */ +void trace_dump_stack(void) +{ + unsigned long flags; + + if (tracing_disabled || tracing_selftest_running) + return 0; + + local_save_flags(flags); + + /* skipping 3 traces, seems to get us at the caller of this function */ + __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); +} + void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) { From cc51a0fca66658ea710db566ba17e80e3f7d4957 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 11 Dec 2009 11:54:51 -0500 Subject: [PATCH 05/20] tracing: Add stack trace to irqsoff tracer The irqsoff and friends tracers help in finding causes of latency in the kernel. The also work with the function tracer to show what was happening when interrupts or preemption are disabled. But the function tracer has a bit of an overhead and can cause exagerated readings. Currently, when tracing with /proc/sys/kernel/ftrace_enabled = 0, where the function tracer is disabled, the information that is provided can end up being useless. For example, a 2 and a half millisecond latency only showed: # tracer: preemptirqsoff # # preemptirqsoff latency trace v1.1.5 on 2.6.32 # -------------------------------------------------------------------- # latency: 2463 us, #4/4, CPU#2 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) # ----------------- # | task: -4242 (uid:0 nice:0 policy:0 rt_prio:0) # ----------------- # => started at: _spin_lock_irqsave # => ended at: remove_wait_queue # # # _------=> CPU# # / _-----=> irqs-off # | / _----=> need-resched # || / _---=> hardirq/softirq # ||| / _--=> preempt-depth # |||| /_--=> lock-depth # |||||/ delay # cmd pid |||||| time | caller # \ / |||||| \ | / hackbenc-4242 2d.... 0us!: trace_hardirqs_off <-_spin_lock_irqsave hackbenc-4242 2...1. 2463us+: _spin_unlock_irqrestore <-remove_wait_queue hackbenc-4242 2...1. 2466us : trace_preempt_on <-remove_wait_queue The above lets us know that hackbench with pid 2463 grabbed a spin lock somewhere and enabled preemption at remove_wait_queue. This helps a little but where this actually happened is not informative. This patch adds the stack dump to the end of the irqsoff tracer. This provides the following output: hackbenc-4242 2d.... 0us!: trace_hardirqs_off <-_spin_lock_irqsave hackbenc-4242 2...1. 2463us+: _spin_unlock_irqrestore <-remove_wait_queue hackbenc-4242 2...1. 2466us : trace_preempt_on <-remove_wait_queue hackbenc-4242 2...1. 2467us : => sub_preempt_count => _spin_unlock_irqrestore => remove_wait_queue => free_poll_entry => poll_freewait => do_sys_poll => sys_poll => system_call_fastpath Now we see that the culprit of this latency was the free_poll_entry code. Signed-off-by: Steven Rostedt --- kernel/trace/trace_irqsoff.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 3aa7eaa2114..2974bc7538c 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -151,6 +151,8 @@ check_critical_timing(struct trace_array *tr, goto out_unlock; trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); + /* Skip 5 functions to get to the irq/preempt enable function */ + __trace_stack(tr, flags, 5, pc); if (data->critical_sequence != max_sequence) goto out_unlock; From 4819568f23a8bef0ca99b740ca60fe2450ab0aac Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sat, 12 Dec 2009 13:06:13 -0800 Subject: [PATCH 06/20] ftrace.h: Use common pr_info fmt string Reduces fmt string space a bit. Signed-off-by: Joe Perches Cc: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <1260651974.2637.4.camel@Joe-Laptop.home> Signed-off-by: Ingo Molnar --- include/trace/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index d1b3de9c1a7..c4eca380204 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -733,7 +733,7 @@ static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\ ret = register_trace_##call(ftrace_raw_event_##call); \ if (ret) \ pr_info("event trace: Could not activate trace point " \ - "probe to " #call "\n"); \ + "probe to %s\n", #call); \ return ret; \ } \ \ From 87d9b4e1c52867a45331a9a5495f6448e0c68b23 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 8 Dec 2009 11:14:20 +0800 Subject: [PATCH 07/20] tracing: Extract duplicate ftrace_raw_init_event_foo() Use a generic trace_event_raw_init() function for all event's raw_init callbacks (but kprobes) instead of defining the same version for each of these. This shrinks the kernel code: text data bss dec hex filename 5355293 1961928 7103260 14420481 dc0a01 vmlinux.o.old 5346802 1961864 7103260 14411926 dbe896 vmlinux.o raw_init can't be removed, because ftrace events and kprobe events use different raw_init callbacks. Though it's possible to totally remove raw_init, I choose to leave it as it is for now. Signed-off-by: Li Zefan Acked-by: Steven Rostedt Cc: Jason Baron Cc: Ingo Molnar LKML-Reference: <4B1DC48C.7080603@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- include/linux/ftrace_event.h | 1 + include/linux/syscalls.h | 4 ++-- include/trace/ftrace.h | 35 ++++------------------------------- kernel/trace/trace_events.c | 14 ++++++++++++++ 4 files changed, 21 insertions(+), 33 deletions(-) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 38f8d655383..ea44b891109 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -158,6 +158,7 @@ enum { FILTER_PTR_STRING, }; +extern int trace_event_raw_init(struct ftrace_event_call *call); extern int trace_define_common_fields(struct ftrace_event_call *call); extern int trace_define_field(struct ftrace_event_call *call, const char *type, const char *name, int offset, int size, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index bc70c5810fe..94ac28437be 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -145,7 +145,7 @@ struct perf_event_attr; .name = "sys_enter"#sname, \ .system = "syscalls", \ .event = &enter_syscall_print_##sname, \ - .raw_init = init_syscall_trace, \ + .raw_init = trace_event_raw_init, \ .show_format = syscall_enter_format, \ .define_fields = syscall_enter_define_fields, \ .regfunc = reg_event_syscall_enter, \ @@ -167,7 +167,7 @@ struct perf_event_attr; .name = "sys_exit"#sname, \ .system = "syscalls", \ .event = &exit_syscall_print_##sname, \ - .raw_init = init_syscall_trace, \ + .raw_init = trace_event_raw_init, \ .show_format = syscall_exit_format, \ .define_fields = syscall_exit_define_fields, \ .regfunc = reg_event_syscall_exit, \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index c4eca380204..6055b0604c8 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -623,23 +623,12 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ * .trace = ftrace_raw_output_, <-- stage 2 * }; * - * static int ftrace_raw_init_event_(struct ftrace_event_call *unused) - * { - * int id; - * - * id = register_ftrace_event(&ftrace_event_type_); - * if (!id) - * return -ENODEV; - * event_.id = id; - * return 0; - * } - * * static struct ftrace_event_call __used * __attribute__((__aligned__(4))) * __attribute__((section("_ftrace_events"))) event_ = { * .name = "", * .system = "", - * .raw_init = ftrace_raw_init_event_, + * .raw_init = trace_event_raw_init, * .regfunc = ftrace_reg_event_, * .unregfunc = ftrace_unreg_event_, * .show_format = ftrace_format_, @@ -647,9 +636,6 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ * */ -#undef TP_FMT -#define TP_FMT(fmt, args...) fmt "\n", ##args - #ifdef CONFIG_EVENT_PROFILE #define _TRACE_PROFILE_INIT(call) \ @@ -744,19 +730,7 @@ static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\ \ static struct trace_event ftrace_event_type_##call = { \ .trace = ftrace_raw_output_##call, \ -}; \ - \ -static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\ -{ \ - int id; \ - \ - id = register_ftrace_event(&ftrace_event_type_##call); \ - if (!id) \ - return -ENODEV; \ - event_##call.id = id; \ - INIT_LIST_HEAD(&event_##call.fields); \ - return 0; \ -} +}; #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ @@ -776,7 +750,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ .system = __stringify(TRACE_SYSTEM), \ .event = &ftrace_event_type_##call, \ - .raw_init = ftrace_raw_init_event_##call, \ + .raw_init = trace_event_raw_init, \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ .show_format = ftrace_format_##template, \ @@ -793,7 +767,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ .system = __stringify(TRACE_SYSTEM), \ .event = &ftrace_event_type_##call, \ - .raw_init = ftrace_raw_init_event_##call, \ + .raw_init = trace_event_raw_init, \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ .show_format = ftrace_format_##call, \ @@ -953,7 +927,6 @@ end: \ perf_swevent_put_recursion_context(rctx); \ end_recursion: \ local_irq_restore(irq_flags); \ - \ } #undef DEFINE_EVENT diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 1d18315dc83..8ed66e0d476 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -105,6 +105,20 @@ void trace_destroy_fields(struct ftrace_event_call *call) } } +int trace_event_raw_init(struct ftrace_event_call *call) +{ + int id; + + id = register_ftrace_event(call->event); + if (!id) + return -ENODEV; + call->id = id; + INIT_LIST_HEAD(&call->fields); + + return 0; +} +EXPORT_SYMBOL_GPL(trace_event_raw_init); + static void ftrace_event_enable_disable(struct ftrace_event_call *call, int enable) { From 614a71a26ba3d97e9fa85649db69a682b78e407d Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 8 Dec 2009 11:14:36 +0800 Subject: [PATCH 08/20] tracing: Pull up calls to trace_define_common_fields() Call trace_define_common_fields() in event_create_dir() only. This avoids trace events to handle it from their define_fields callbacks and shrinks the kernel code size: text data bss dec hex filename 5346802 1961864 7103260 14411926 dbe896 vmlinux.o.old 5345151 1961864 7103260 14410275 dbe223 vmlinux.o Signed-off-by: Li Zefan Acked-by: Steven Rostedt Cc: Ingo Molnar Cc: Jason Baron Cc: Masami Hiramatsu LKML-Reference: <4B1DC49C.8000107@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- include/linux/ftrace_event.h | 1 - include/trace/ftrace.h | 4 ---- kernel/trace/trace_events.c | 7 ++++--- kernel/trace/trace_export.c | 4 ---- kernel/trace/trace_kprobe.c | 8 -------- kernel/trace/trace_syscalls.c | 8 -------- 6 files changed, 4 insertions(+), 28 deletions(-) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index ea44b891109..db97c64ce0e 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -159,7 +159,6 @@ enum { }; extern int trace_event_raw_init(struct ftrace_event_call *call); -extern int trace_define_common_fields(struct ftrace_event_call *call); extern int trace_define_field(struct ftrace_event_call *call, const char *type, const char *name, int offset, int size, int is_signed, int filter_type); diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 6055b0604c8..2af2f7a2c1b 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -436,10 +436,6 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ struct ftrace_raw_##call field; \ int ret; \ \ - ret = trace_define_common_fields(event_call); \ - if (ret) \ - return ret; \ - \ tstruct; \ \ return ret; \ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 8ed66e0d476..97b0b3aa166 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -78,7 +78,7 @@ EXPORT_SYMBOL_GPL(trace_define_field); if (ret) \ return ret; -int trace_define_common_fields(struct ftrace_event_call *call) +static int trace_define_common_fields(struct ftrace_event_call *call) { int ret; struct trace_entry ent; @@ -91,7 +91,6 @@ int trace_define_common_fields(struct ftrace_event_call *call) return ret; } -EXPORT_SYMBOL_GPL(trace_define_common_fields); void trace_destroy_fields(struct ftrace_event_call *call) { @@ -927,7 +926,9 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, id); if (call->define_fields) { - ret = call->define_fields(call); + ret = trace_define_common_fields(call); + if (!ret) + ret = call->define_fields(call); if (ret < 0) { pr_warning("Could not initialize trace point" " events/%s\n", call->name); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index dff8c84ddf1..458e5bfe26d 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -184,10 +184,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ struct struct_name field; \ int ret; \ \ - ret = trace_define_common_fields(event_call); \ - if (ret) \ - return ret; \ - \ tstruct; \ \ return ret; \ diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index aff5f80b59b..e3c80e92589 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1113,10 +1113,6 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call) struct kprobe_trace_entry field; struct trace_probe *tp = (struct trace_probe *)event_call->data; - ret = trace_define_common_fields(event_call); - if (!ret) - return ret; - DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1); /* Set argument names as fields */ @@ -1131,10 +1127,6 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) struct kretprobe_trace_entry field; struct trace_probe *tp = (struct trace_probe *)event_call->data; - ret = trace_define_common_fields(event_call); - if (!ret) - return ret; - DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 57501d90096..b957edd0ca3 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -217,10 +217,6 @@ int syscall_enter_define_fields(struct ftrace_event_call *call) int i; int offset = offsetof(typeof(trace), args); - ret = trace_define_common_fields(call); - if (ret) - return ret; - ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); if (ret) return ret; @@ -241,10 +237,6 @@ int syscall_exit_define_fields(struct ftrace_event_call *call) struct syscall_trace_exit trace; int ret; - ret = trace_define_common_fields(call); - if (ret) - return ret; - ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); if (ret) return ret; From 3b8e4273814a7f9e9a74ece517d9206fea919aaa Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 8 Dec 2009 11:14:52 +0800 Subject: [PATCH 09/20] tracing: Move a printk out of ftrace_raw_reg_event_foo() Move the printk from each ftrace_raw_reg_event_foo() to its caller ftrace_event_enable_disable(). This avoids each regfunc trace event callbacks to handle a same error report that can be carried from the caller. See how much space this saves: text data bss dec hex filename 5345151 1961864 7103260 14410275 dbe223 vmlinux.o.old 5331487 1961864 7103260 14396611 dbacc3 vmlinux.o Signed-off-by: Li Zefan Acked-by: Steven Rostedt Cc: Jason Baron LKML-Reference: <4B1DC4AC.802@cn.fujitsu.com> [start cmdline record before calling regfunc to avoid lost window of pid to comm resolution] Signed-off-by: Frederic Weisbecker --- include/trace/ftrace.h | 16 ++-------------- kernel/trace/trace_events.c | 20 +++++++++++++++----- kernel/trace/trace_syscalls.c | 10 ++-------- 3 files changed, 19 insertions(+), 27 deletions(-) diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 2af2f7a2c1b..0c21af85211 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -555,13 +555,7 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ * * static int ftrace_reg_event_(struct ftrace_event_call *unused) * { - * int ret; - * - * ret = register_trace_(ftrace_event_); - * if (!ret) - * pr_info("event trace: Could not activate trace point " - * "probe to "); - * return ret; + * return register_trace_(ftrace_event_); * } * * static void ftrace_unreg_event_(struct ftrace_event_call *unused) @@ -710,13 +704,7 @@ static void ftrace_raw_event_##call(proto) \ \ static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\ { \ - int ret; \ - \ - ret = register_trace_##call(ftrace_raw_event_##call); \ - if (ret) \ - pr_info("event trace: Could not activate trace point " \ - "probe to %s\n", #call); \ - return ret; \ + return register_trace_##call(ftrace_raw_event_##call); \ } \ \ static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 97b0b3aa166..189b09baf4f 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -118,9 +118,11 @@ int trace_event_raw_init(struct ftrace_event_call *call) } EXPORT_SYMBOL_GPL(trace_event_raw_init); -static void ftrace_event_enable_disable(struct ftrace_event_call *call, +static int ftrace_event_enable_disable(struct ftrace_event_call *call, int enable) { + int ret = 0; + switch (enable) { case 0: if (call->enabled) { @@ -131,12 +133,20 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call, break; case 1: if (!call->enabled) { - call->enabled = 1; tracing_start_cmdline_record(); - call->regfunc(call); + ret = call->regfunc(call); + if (ret) { + tracing_stop_cmdline_record(); + pr_info("event trace: Could not enable event " + "%s\n", call->name); + break; + } + call->enabled = 1; } break; } + + return ret; } static void ftrace_clear_events(void) @@ -415,7 +425,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, case 0: case 1: mutex_lock(&event_mutex); - ftrace_event_enable_disable(call, val); + ret = ftrace_event_enable_disable(call, val); mutex_unlock(&event_mutex); break; @@ -425,7 +435,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, *ppos += cnt; - return cnt; + return ret ? ret : cnt; } static ssize_t diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index b957edd0ca3..75289f372dd 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -325,10 +325,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) mutex_lock(&syscall_trace_lock); if (!sys_refcount_enter) ret = register_trace_sys_enter(ftrace_syscall_enter); - if (ret) { - pr_info("event trace: Could not activate" - "syscall entry trace point"); - } else { + if (!ret) { set_bit(num, enabled_enter_syscalls); sys_refcount_enter++; } @@ -362,10 +359,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) mutex_lock(&syscall_trace_lock); if (!sys_refcount_exit) ret = register_trace_sys_exit(ftrace_syscall_exit); - if (ret) { - pr_info("event trace: Could not activate" - "syscall exit trace point"); - } else { + if (!ret) { set_bit(num, enabled_exit_syscalls); sys_refcount_exit++; } From 311d16da575f53c3367099579736c1d233efe0dc Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 8 Dec 2009 11:15:11 +0800 Subject: [PATCH 10/20] ftrace: Return EINVAL when writing invalid val to set_ftrace_filter Currently it doesn't warn user on invald value: # echo nonexist_symbol > set_ftrace_filter or: # echo 'nonexist_symbol:mod:fuse' > set_ftrace_filter Better make it return failure. Signed-off-by: Li Zefan Acked-by: Steven Rostedt LKML-Reference: <4B1DC4BF.2070003@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- kernel/trace/ftrace.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e51a1bcb7be..08a3fb5b318 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1724,7 +1724,7 @@ ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type) return ftrace_match(str, regex, len, type); } -static void ftrace_match_records(char *buff, int len, int enable) +static int ftrace_match_records(char *buff, int len, int enable) { unsigned int search_len; struct ftrace_page *pg; @@ -1733,6 +1733,7 @@ static void ftrace_match_records(char *buff, int len, int enable) char *search; int type; int not; + int found = 0; flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; type = filter_parse_regex(buff, len, &search, ¬); @@ -1750,6 +1751,7 @@ static void ftrace_match_records(char *buff, int len, int enable) rec->flags &= ~flag; else rec->flags |= flag; + found = 1; } /* * Only enable filtering if we have a function that @@ -1759,6 +1761,8 @@ static void ftrace_match_records(char *buff, int len, int enable) ftrace_filtered = 1; } while_for_each_ftrace_rec(); mutex_unlock(&ftrace_lock); + + return found; } static int @@ -1780,7 +1784,7 @@ ftrace_match_module_record(struct dyn_ftrace *rec, char *mod, return 1; } -static void ftrace_match_module_records(char *buff, char *mod, int enable) +static int ftrace_match_module_records(char *buff, char *mod, int enable) { unsigned search_len = 0; struct ftrace_page *pg; @@ -1789,6 +1793,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable) char *search = buff; unsigned long flag; int not = 0; + int found = 0; flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; @@ -1819,12 +1824,15 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable) rec->flags &= ~flag; else rec->flags |= flag; + found = 1; } if (enable && (rec->flags & FTRACE_FL_FILTER)) ftrace_filtered = 1; } while_for_each_ftrace_rec(); mutex_unlock(&ftrace_lock); + + return found; } /* @@ -1853,8 +1861,9 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable) if (!strlen(mod)) return -EINVAL; - ftrace_match_module_records(func, mod, enable); - return 0; + if (ftrace_match_module_records(func, mod, enable)) + return 0; + return -EINVAL; } static struct ftrace_func_command ftrace_mod_cmd = { @@ -2151,8 +2160,9 @@ static int ftrace_process_regex(char *buff, int len, int enable) func = strsep(&next, ":"); if (!next) { - ftrace_match_records(func, len, enable); - return 0; + if (ftrace_match_records(func, len, enable)) + return 0; + return ret; } /* command found */ From 313254a9400d388b46150c0f355e216418a2f598 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 8 Dec 2009 11:15:30 +0800 Subject: [PATCH 11/20] ftrace: Call trace_parser_clear() properly I found a weird behavior: # echo 'fuse:*' > set_ftrace_filter bash: echo: write error: Invalid argument # cat set_ftrace_filter fuse_dev_fasync fuse_dev_poll fuse_copy_do We should call trace_parser_clear() no matter ftrace_process_regex() returns 0 or -errno, otherwise we will actually take the unaccepted records from ftrace_regex_release(). Signed-off-by: Li Zefan Acked-by: Steven Rostedt LKML-Reference: <4B1DC4D2.3000406@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- kernel/trace/ftrace.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 08a3fb5b318..ff8aecdc6dd 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2208,10 +2208,9 @@ ftrace_regex_write(struct file *file, const char __user *ubuf, !trace_parser_cont(parser)) { ret = ftrace_process_regex(parser->buffer, parser->idx, enable); + trace_parser_clear(parser); if (ret) goto out_unlock; - - trace_parser_clear(parser); } ret = read; From 91baf6285be7282cfa487de92f836c50749dffb9 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 8 Dec 2009 11:15:45 +0800 Subject: [PATCH 12/20] function-graph: Allow writing the same val to set_graph_function # echo 'do_open' > set_graph_function # echo 'do_open' >> set_graph_function bash: echo: write error: Invalid argument Make it valid to write the same value to set_graph_function, which is consistent with set_ftrace_filter interface. Signed-off-by: Li Zefan Acked-by: Steven Rostedt LKML-reference: <4B1DC4E1.1060303@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- kernel/trace/ftrace.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ff8aecdc6dd..7968762c816 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2552,10 +2552,9 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer) exists = true; break; } - if (!exists) { + if (!exists) array[(*idx)++] = rec->ip; - found = 1; - } + found = 1; } } while_for_each_ftrace_rec(); From fdb372ed4cadbfe9dbba0e932a77d0523682e690 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 8 Dec 2009 11:15:59 +0800 Subject: [PATCH 13/20] tracing: Use seq file for trace_options Code simplification for reading trace_options. Signed-off-by: Li Zefan Acked-by: Steven Rostedt LKML-reference: <4B1DC4EF.3090106@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- kernel/trace/trace.c | 60 +++++++++++++------------------------------- 1 file changed, 17 insertions(+), 43 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 88bd9ae2a9e..a6c41cc6328 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2316,67 +2316,32 @@ static const struct file_operations tracing_cpumask_fops = { .write = tracing_cpumask_write, }; -static ssize_t -tracing_trace_options_read(struct file *filp, char __user *ubuf, - size_t cnt, loff_t *ppos) +static int tracing_trace_options_show(struct seq_file *m, void *v) { struct tracer_opt *trace_opts; u32 tracer_flags; - int len = 0; - char *buf; - int r = 0; int i; - - /* calculate max size */ - for (i = 0; trace_options[i]; i++) { - len += strlen(trace_options[i]); - len += 3; /* "no" and newline */ - } - mutex_lock(&trace_types_lock); tracer_flags = current_trace->flags->val; trace_opts = current_trace->flags->opts; - /* - * Increase the size with names of options specific - * of the current tracer. - */ - for (i = 0; trace_opts[i].name; i++) { - len += strlen(trace_opts[i].name); - len += 3; /* "no" and newline */ - } - - /* +1 for \0 */ - buf = kmalloc(len + 1, GFP_KERNEL); - if (!buf) { - mutex_unlock(&trace_types_lock); - return -ENOMEM; - } - for (i = 0; trace_options[i]; i++) { if (trace_flags & (1 << i)) - r += sprintf(buf + r, "%s\n", trace_options[i]); + seq_printf(m, "%s\n", trace_options[i]); else - r += sprintf(buf + r, "no%s\n", trace_options[i]); + seq_printf(m, "no%s\n", trace_options[i]); } for (i = 0; trace_opts[i].name; i++) { if (tracer_flags & trace_opts[i].bit) - r += sprintf(buf + r, "%s\n", - trace_opts[i].name); + seq_printf(m, "%s\n", trace_opts[i].name); else - r += sprintf(buf + r, "no%s\n", - trace_opts[i].name); + seq_printf(m, "no%s\n", trace_opts[i].name); } mutex_unlock(&trace_types_lock); - WARN_ON(r >= len + 1); - - r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); - - kfree(buf); - return r; + return 0; } /* Try to assign a tracer specific option */ @@ -2471,9 +2436,18 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, return cnt; } +static int tracing_trace_options_open(struct inode *inode, struct file *file) +{ + if (tracing_disabled) + return -ENODEV; + return single_open(file, tracing_trace_options_show, NULL); +} + static const struct file_operations tracing_iter_fops = { - .open = tracing_open_generic, - .read = tracing_trace_options_read, + .open = tracing_trace_options_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, .write = tracing_trace_options_write, }; From 13f16d209161c95e92aef40e350cc6cf56ac440b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 8 Dec 2009 11:16:11 +0800 Subject: [PATCH 14/20] tracing: Use seq file for trace_clock The buffer for the output is as small as 64 bytes, so it'll overflow if we add more clock type. Use seq file instead. Signed-off-by: Li Zefan Acked-by: Steven Rostedt LKML-Reference: <4B1DC4FB.5030407@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- kernel/trace/trace.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a6c41cc6328..886268e0d8e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3366,21 +3366,18 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, return cnt; } -static ssize_t tracing_clock_read(struct file *filp, char __user *ubuf, - size_t cnt, loff_t *ppos) +static int tracing_clock_show(struct seq_file *m, void *v) { - char buf[64]; - int bufiter = 0; int i; for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) - bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, + seq_printf(m, "%s%s%s%s", i ? " " : "", i == trace_clock_id ? "[" : "", trace_clocks[i].name, i == trace_clock_id ? "]" : ""); - bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, "\n"); + seq_putc(m, '\n'); - return simple_read_from_buffer(ubuf, cnt, ppos, buf, bufiter); + return 0; } static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, @@ -3422,6 +3419,13 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, return cnt; } +static int tracing_clock_open(struct inode *inode, struct file *file) +{ + if (tracing_disabled) + return -ENODEV; + return single_open(file, tracing_clock_show, NULL); +} + static const struct file_operations tracing_max_lat_fops = { .open = tracing_open_generic, .read = tracing_max_lat_read, @@ -3460,8 +3464,10 @@ static const struct file_operations tracing_mark_fops = { }; static const struct file_operations trace_clock_fops = { - .open = tracing_open_generic, - .read = tracing_clock_read, + .open = tracing_clock_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, .write = tracing_clock_write, }; From 2cbafd68b826f8e0471875cf33cdfb8a1478aef1 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 8 Dec 2009 11:16:26 +0800 Subject: [PATCH 15/20] tracing: Remove useless trace option Since commit 4d9493c90f8e6e1b164aede3814010a290161abb ("ftrace: remove add-hoc code"), option "sched-tree" has become useless. Signed-off-by: Li Zefan Acked-by: Steven Rostedt LKML-Reference: <4B1DC50A.7040402@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- kernel/trace/trace.c | 1 - kernel/trace/trace.h | 23 +++++++++++------------ 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 886268e0d8e..898409d6042 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -313,7 +313,6 @@ static const char *trace_options[] = { "bin", "block", "stacktrace", - "sched-tree", "trace_printk", "ftrace_preempt", "branch", diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7fa33cab696..1b18cb240c1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -597,18 +597,17 @@ enum trace_iterator_flags { TRACE_ITER_BIN = 0x40, TRACE_ITER_BLOCK = 0x80, TRACE_ITER_STACKTRACE = 0x100, - TRACE_ITER_SCHED_TREE = 0x200, - TRACE_ITER_PRINTK = 0x400, - TRACE_ITER_PREEMPTONLY = 0x800, - TRACE_ITER_BRANCH = 0x1000, - TRACE_ITER_ANNOTATE = 0x2000, - TRACE_ITER_USERSTACKTRACE = 0x4000, - TRACE_ITER_SYM_USEROBJ = 0x8000, - TRACE_ITER_PRINTK_MSGONLY = 0x10000, - TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ - TRACE_ITER_LATENCY_FMT = 0x40000, - TRACE_ITER_SLEEP_TIME = 0x80000, - TRACE_ITER_GRAPH_TIME = 0x100000, + TRACE_ITER_PRINTK = 0x200, + TRACE_ITER_PREEMPTONLY = 0x400, + TRACE_ITER_BRANCH = 0x800, + TRACE_ITER_ANNOTATE = 0x1000, + TRACE_ITER_USERSTACKTRACE = 0x2000, + TRACE_ITER_SYM_USEROBJ = 0x4000, + TRACE_ITER_PRINTK_MSGONLY = 0x8000, + TRACE_ITER_CONTEXT_INFO = 0x10000, /* Print pid/cpu/time */ + TRACE_ITER_LATENCY_FMT = 0x20000, + TRACE_ITER_SLEEP_TIME = 0x40000, + TRACE_ITER_GRAPH_TIME = 0x80000, }; /* From 8d18eaaff5acaa58369be342c86e607643ce10c7 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 8 Dec 2009 11:17:06 +0800 Subject: [PATCH 16/20] tracing: Simplify trace_option_write() - remove duplicate code inside trace_options_write() - extract duplicate code in trace_options_write() and set_tracer_option() Signed-off-by: Li Zefan Acked-by: Steven Rostedt LKML-Reference: <4B1DC532.9010802@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- kernel/trace/trace.c | 95 +++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 59 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 898409d6042..05076008f37 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2343,29 +2343,13 @@ static int tracing_trace_options_show(struct seq_file *m, void *v) return 0; } -/* Try to assign a tracer specific option */ -static int set_tracer_option(struct tracer *trace, char *cmp, int neg) +static int __set_tracer_option(struct tracer *trace, + struct tracer_flags *tracer_flags, + struct tracer_opt *opts, int neg) { - struct tracer_flags *tracer_flags = trace->flags; - struct tracer_opt *opts = NULL; - int ret = 0, i = 0; - int len; + int ret; - for (i = 0; tracer_flags->opts[i].name; i++) { - opts = &tracer_flags->opts[i]; - len = strlen(opts->name); - - if (strncmp(cmp, opts->name, len) == 0) { - ret = trace->set_flag(tracer_flags->val, - opts->bit, !neg); - break; - } - } - /* Not found */ - if (!tracer_flags->opts[i].name) - return -EINVAL; - - /* Refused to handle */ + ret = trace->set_flag(tracer_flags->val, opts->bit, !neg); if (ret) return ret; @@ -2373,10 +2357,27 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg) tracer_flags->val &= ~opts->bit; else tracer_flags->val |= opts->bit; - return 0; } +/* Try to assign a tracer specific option */ +static int set_tracer_option(struct tracer *trace, char *cmp, int neg) +{ + struct tracer_flags *tracer_flags = trace->flags; + struct tracer_opt *opts = NULL; + int i; + + for (i = 0; tracer_flags->opts[i].name; i++) { + opts = &tracer_flags->opts[i]; + + if (strcmp(cmp, opts->name) == 0) + return __set_tracer_option(trace, trace->flags, + opts, neg); + } + + return -EINVAL; +} + static void set_tracer_flags(unsigned int mask, int enabled) { /* do nothing if flag is already set */ @@ -2394,7 +2395,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { char buf[64]; - char *cmp = buf; + char *cmp; int neg = 0; int ret; int i; @@ -2406,16 +2407,15 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, return -EFAULT; buf[cnt] = 0; + cmp = strstrip(buf); - if (strncmp(buf, "no", 2) == 0) { + if (strncmp(cmp, "no", 2) == 0) { neg = 1; cmp += 2; } for (i = 0; trace_options[i]; i++) { - int len = strlen(trace_options[i]); - - if (strncmp(cmp, trace_options[i], len) == 0) { + if (strcmp(cmp, trace_options[i]) == 0) { set_tracer_flags(1 << i, !neg); break; } @@ -3927,39 +3927,16 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, if (ret < 0) return ret; - ret = 0; - switch (val) { - case 0: - /* do nothing if already cleared */ - if (!(topt->flags->val & topt->opt->bit)) - break; - - mutex_lock(&trace_types_lock); - if (current_trace->set_flag) - ret = current_trace->set_flag(topt->flags->val, - topt->opt->bit, 0); - mutex_unlock(&trace_types_lock); - if (ret) - return ret; - topt->flags->val &= ~topt->opt->bit; - break; - case 1: - /* do nothing if already set */ - if (topt->flags->val & topt->opt->bit) - break; - - mutex_lock(&trace_types_lock); - if (current_trace->set_flag) - ret = current_trace->set_flag(topt->flags->val, - topt->opt->bit, 1); - mutex_unlock(&trace_types_lock); - if (ret) - return ret; - topt->flags->val |= topt->opt->bit; - break; - - default: + if (val != 0 && val != 1) return -EINVAL; + + if (!!(topt->flags->val & topt->opt->bit) != val) { + mutex_lock(&trace_types_lock); + ret = __set_tracer_option(current_trace, topt->flags, + topt->opt, val); + mutex_unlock(&trace_types_lock); + if (ret) + return ret; } *ppos += cnt; From e00bf2ec60605eb95687b7a0c3b83c87c48541dc Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 8 Dec 2009 11:17:29 +0800 Subject: [PATCH 17/20] tracing: Change event->profile_count to be int type Like total_profile_count, struct ftrace_event_call::profile_count is protected by event_mutex, so it doesn't need to be atomic_t. Signed-off-by: Li Zefan Acked-by: Steven Rostedt Cc: Jason Baron Cc: Masami Hiramatsu Cc: Peter Zijlstra LKML-Reference: <4B1DC549.5010705@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- include/linux/ftrace_event.h | 2 +- include/linux/syscalls.h | 2 -- include/trace/ftrace.h | 1 - kernel/trace/trace_event_profile.c | 6 +++--- kernel/trace/trace_kprobe.c | 1 - 5 files changed, 4 insertions(+), 8 deletions(-) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index db97c64ce0e..2233c98d80d 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -131,7 +131,7 @@ struct ftrace_event_call { void *mod; void *data; - atomic_t profile_count; + int profile_count; int (*profile_enable)(struct ftrace_event_call *); void (*profile_disable)(struct ftrace_event_call *); }; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 94ac28437be..72d69860d90 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -102,12 +102,10 @@ struct perf_event_attr; #ifdef CONFIG_EVENT_PROFILE #define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ - .profile_count = ATOMIC_INIT(-1), \ .profile_enable = prof_sysenter_enable, \ .profile_disable = prof_sysenter_disable, #define TRACE_SYS_EXIT_PROFILE_INIT(sname) \ - .profile_count = ATOMIC_INIT(-1), \ .profile_enable = prof_sysexit_enable, \ .profile_disable = prof_sysexit_disable, #else diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 0c21af85211..73523151a73 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -629,7 +629,6 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ #ifdef CONFIG_EVENT_PROFILE #define _TRACE_PROFILE_INIT(call) \ - .profile_count = ATOMIC_INIT(-1), \ .profile_enable = ftrace_profile_enable_##call, \ .profile_disable = ftrace_profile_disable_##call, diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index d9c60f80aa0..9e25573242c 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -25,7 +25,7 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event) char *buf; int ret = -ENOMEM; - if (atomic_inc_return(&event->profile_count)) + if (event->profile_count++ > 0) return 0; if (!total_profile_count) { @@ -56,7 +56,7 @@ fail_buf_nmi: perf_trace_buf = NULL; } fail_buf: - atomic_dec(&event->profile_count); + event->profile_count--; return ret; } @@ -83,7 +83,7 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event) { char *buf, *nmi_buf; - if (!atomic_add_negative(-1, &event->profile_count)) + if (--event->profile_count > 0) return; event->profile_disable(event); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index e3c80e92589..6ed223447a3 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1426,7 +1426,6 @@ static int register_probe_event(struct trace_probe *tp) call->unregfunc = probe_event_disable; #ifdef CONFIG_EVENT_PROFILE - atomic_set(&call->profile_count, -1); call->profile_enable = probe_profile_enable; call->profile_disable = probe_profile_disable; #endif From 472bbe02c92a7a8299d7b16946277d98bb8f4bb7 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 8 Dec 2009 11:17:51 +0800 Subject: [PATCH 18/20] tracing/power: Remove two exports trace_power_start and trace_power_end are used in arch/x86/kernel/power.c, and this file can't be compiled as a module, so these two tracepoints don't need to be exported. Signed-off-by: Li Zefan Acked-by: Arjan van de Ven Acked-by: Steven Rostedt LKML-Reference: <4B1DC55F.7060305@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- kernel/trace/power-traces.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c index e06c6e3d56a..9f4f565b01e 100644 --- a/kernel/trace/power-traces.c +++ b/kernel/trace/power-traces.c @@ -14,7 +14,5 @@ #define CREATE_TRACE_POINTS #include -EXPORT_TRACEPOINT_SYMBOL_GPL(power_start); -EXPORT_TRACEPOINT_SYMBOL_GPL(power_end); EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency); From 16620e0f1990fa6d896a639449c4b3d678458464 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 8 Dec 2009 11:18:16 +0800 Subject: [PATCH 19/20] ksym_tracer: Fix bad cast Fix this warning: kernel/trace/trace_ksym.c: In function 'ksym_trace_filter_read': kernel/trace/trace_ksym.c:239: warning: cast to pointer from integer of different size Signed-off-by: Li Zefan Acked-by: Steven Rostedt Cc: "K.Prasad" LKML-Reference: <4B1DC578.9020909@cn.fujitsu.com> [remove the strstrip fix as tglx already fixed that] Signed-off-by: Frederic Weisbecker --- kernel/trace/trace_ksym.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 64e7a5bd669..48f1c6c248c 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -235,7 +235,8 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, mutex_lock(&ksym_tracer_mutex); hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { - ret = trace_seq_printf(s, "%pS:", (void *)entry->attr.bp_addr); + ret = trace_seq_printf(s, "%pS:", + (void *)(unsigned long)entry->attr.bp_addr); if (entry->attr.bp_type == HW_BREAKPOINT_R) ret = trace_seq_puts(s, "r--\n"); else if (entry->attr.bp_type == HW_BREAKPOINT_W) @@ -298,8 +299,8 @@ static ssize_t ksym_trace_filter_write(struct file *file, * 2: echo 0 > ksym_trace_filter * 3: echo "*:---" > ksym_trace_filter */ - if (!input_string[0] || !strcmp(input_string, "0") || - !strcmp(input_string, "*:---")) { + if (!buf[0] || !strcmp(buf, "0") || + !strcmp(buf, "*:---")) { __ksym_trace_reset(); ret = 0; goto out; From e36c54582c6f14adc9e10473e2aec2cc4f0acc03 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 14 Dec 2009 15:58:33 -0500 Subject: [PATCH 20/20] tracing: Fix return of trace_dump_stack() The trace_dump_stack() returned a value for a void function. Also, added the missing stub for trace_dump_stack() when tracing is not configured. Reported-by: Ingo Molnar LKML-Reference: <20091214162713.GA31060@elte.hu> Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 1 + kernel/trace/trace.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 5ad4199fb07..f1dc752da0d 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -527,6 +527,7 @@ trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); static inline void tracing_start(void) { } static inline void tracing_stop(void) { } static inline void ftrace_off_permanent(void) { } +static inline void trace_dump_stack(void) { } static inline int trace_printk(const char *fmt, ...) { diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bd7b969a729..ee61915935d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1158,7 +1158,7 @@ void trace_dump_stack(void) unsigned long flags; if (tracing_disabled || tracing_selftest_running) - return 0; + return; local_save_flags(flags);