Merge branch 'tracing/ftrace' into auto-ftrace-next

This commit is contained in:
Ingo Molnar 2008-07-10 11:43:00 +02:00
commit bac0c9103b
89 changed files with 8834 additions and 115 deletions

View file

@ -528,6 +528,10 @@ KBUILD_CFLAGS += -g
KBUILD_AFLAGS += -gdwarf-2
endif
ifdef CONFIG_FTRACE
KBUILD_CFLAGS += -pg
endif
# We trigger additional mismatches with less inlining
ifdef CONFIG_DEBUG_SECTION_MISMATCH
KBUILD_CFLAGS += $(call cc-option, -fno-inline-functions-called-once)

View file

@ -14,6 +14,8 @@ config ARM
select HAVE_OPROFILE
select HAVE_KPROBES if (!XIP_KERNEL)
select HAVE_KRETPROBES if (HAVE_KPROBES)
select HAVE_FTRACE if (!XIP_KERNEL)
select HAVE_DYNAMIC_FTRACE if (HAVE_FTRACE)
help
The ARM series is a line of low-power-consumption RISC chip designs
licensed by ARM Ltd and targeted at embedded applications and

View file

@ -69,6 +69,12 @@ SEDFLAGS = s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/
targets := vmlinux vmlinux.lds piggy.gz piggy.o font.o font.c \
head.o misc.o $(OBJS)
ifeq ($(CONFIG_FTRACE),y)
ORIG_CFLAGS := $(KBUILD_CFLAGS)
KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
endif
EXTRA_CFLAGS := -fpic -fno-builtin
EXTRA_AFLAGS :=

View file

@ -4,6 +4,10 @@
AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
ifdef CONFIG_DYNAMIC_FTRACE
CFLAGS_REMOVE_ftrace.o = -pg
endif
# Object file lists.
obj-y := compat.o entry-armv.o entry-common.o irq.o \
@ -18,6 +22,7 @@ obj-$(CONFIG_ARTHUR) += arthur.o
obj-$(CONFIG_ISA_DMA) += dma-isa.o
obj-$(CONFIG_PCI) += bios32.o isa.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o
obj-$(CONFIG_ATAGS_PROC) += atags.o

View file

@ -18,6 +18,7 @@
#include <asm/io.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/ftrace.h>
/*
* libgcc functions - functions that are used internally by the
@ -181,3 +182,7 @@ EXPORT_SYMBOL(_find_next_bit_be);
#endif
EXPORT_SYMBOL(copy_page);
#ifdef CONFIG_FTRACE
EXPORT_SYMBOL(mcount);
#endif

View file

@ -9,6 +9,7 @@
*/
#include <asm/unistd.h>
#include <asm/ftrace.h>
#include <asm/arch/entry-macro.S>
#include "entry-header.S"
@ -99,6 +100,56 @@ ENTRY(ret_from_fork)
#undef CALL
#define CALL(x) .long x
#ifdef CONFIG_FTRACE
#ifdef CONFIG_DYNAMIC_FTRACE
ENTRY(mcount)
stmdb sp!, {r0-r3, lr}
mov r0, lr
sub r0, r0, #MCOUNT_INSN_SIZE
.globl mcount_call
mcount_call:
bl ftrace_stub
ldmia sp!, {r0-r3, pc}
ENTRY(ftrace_caller)
stmdb sp!, {r0-r3, lr}
ldr r1, [fp, #-4]
mov r0, lr
sub r0, r0, #MCOUNT_INSN_SIZE
.globl ftrace_call
ftrace_call:
bl ftrace_stub
ldmia sp!, {r0-r3, pc}
#else
ENTRY(mcount)
stmdb sp!, {r0-r3, lr}
ldr r0, =ftrace_trace_function
ldr r2, [r0]
adr r0, ftrace_stub
cmp r0, r2
bne trace
ldmia sp!, {r0-r3, pc}
trace:
ldr r1, [fp, #-4]
mov r0, lr
sub r0, r0, #MCOUNT_INSN_SIZE
mov lr, pc
mov pc, r2
ldmia sp!, {r0-r3, pc}
#endif /* CONFIG_DYNAMIC_FTRACE */
.globl ftrace_stub
ftrace_stub:
mov pc, lr
#endif /* CONFIG_FTRACE */
/*=============================================================================
* SWI handler
*-----------------------------------------------------------------------------

116
arch/arm/kernel/ftrace.c Normal file
View file

@ -0,0 +1,116 @@
/*
* Dynamic function tracing support.
*
* Copyright (C) 2008 Abhishek Sagar <sagar.abhishek@gmail.com>
*
* For licencing details, see COPYING.
*
* Defines low-level handling of mcount calls when the kernel
* is compiled with the -pg flag. When using dynamic ftrace, the
* mcount call-sites get patched lazily with NOP till they are
* enabled. All code mutation routines here take effect atomically.
*/
#include <linux/ftrace.h>
#include <asm/cacheflush.h>
#include <asm/ftrace.h>
#define PC_OFFSET 8
#define BL_OPCODE 0xeb000000
#define BL_OFFSET_MASK 0x00ffffff
static unsigned long bl_insn;
static const unsigned long NOP = 0xe1a00000; /* mov r0, r0 */
unsigned char *ftrace_nop_replace(void)
{
return (char *)&NOP;
}
/* construct a branch (BL) instruction to addr */
unsigned char *ftrace_call_replace(unsigned long pc, unsigned long addr)
{
long offset;
offset = (long)addr - (long)(pc + PC_OFFSET);
if (unlikely(offset < -33554432 || offset > 33554428)) {
/* Can't generate branches that far (from ARM ARM). Ftrace
* doesn't generate branches outside of kernel text.
*/
WARN_ON_ONCE(1);
return NULL;
}
offset = (offset >> 2) & BL_OFFSET_MASK;
bl_insn = BL_OPCODE | offset;
return (unsigned char *)&bl_insn;
}
int ftrace_modify_code(unsigned long pc, unsigned char *old_code,
unsigned char *new_code)
{
unsigned long err = 0, replaced = 0, old, new;
old = *(unsigned long *)old_code;
new = *(unsigned long *)new_code;
__asm__ __volatile__ (
"1: ldr %1, [%2] \n"
" cmp %1, %4 \n"
"2: streq %3, [%2] \n"
" cmpne %1, %3 \n"
" movne %0, #2 \n"
"3:\n"
".section .fixup, \"ax\"\n"
"4: mov %0, #1 \n"
" b 3b \n"
".previous\n"
".section __ex_table, \"a\"\n"
" .long 1b, 4b \n"
" .long 2b, 4b \n"
".previous\n"
: "=r"(err), "=r"(replaced)
: "r"(pc), "r"(new), "r"(old), "0"(err), "1"(replaced)
: "memory");
if (!err && (replaced == old))
flush_icache_range(pc, pc + MCOUNT_INSN_SIZE);
return err;
}
int ftrace_update_ftrace_func(ftrace_func_t func)
{
int ret;
unsigned long pc, old;
unsigned char *new;
pc = (unsigned long)&ftrace_call;
memcpy(&old, &ftrace_call, MCOUNT_INSN_SIZE);
new = ftrace_call_replace(pc, (unsigned long)func);
ret = ftrace_modify_code(pc, (unsigned char *)&old, new);
return ret;
}
int ftrace_mcount_set(unsigned long *data)
{
unsigned long pc, old;
unsigned long *addr = data;
unsigned char *new;
pc = (unsigned long)&mcount_call;
memcpy(&old, &mcount_call, MCOUNT_INSN_SIZE);
new = ftrace_call_replace(pc, *addr);
*addr = ftrace_modify_code(pc, (unsigned char *)&old, new);
return 0;
}
/* run from kstop_machine */
int __init ftrace_dyn_arch_init(void *data)
{
ftrace_mcount_set(data);
return 0;
}

View file

@ -274,7 +274,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
* for kretprobe handlers which should normally be interested in r0 only
* anyway.
*/
static void __attribute__((naked)) __kprobes kretprobe_trampoline(void)
void __naked __kprobes kretprobe_trampoline(void)
{
__asm__ __volatile__ (
"stmdb sp!, {r0 - r11} \n\t"

View file

@ -105,11 +105,13 @@ config ARCH_NO_VIRT_TO_BUS
config PPC
bool
default y
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE
select HAVE_IDE
select HAVE_OPROFILE
select HAVE_KPROBES
select HAVE_KRETPROBES
select HAVE_LMB
select HAVE_OPROFILE
config EARLY_PRINTK
bool

View file

@ -12,6 +12,18 @@ CFLAGS_prom_init.o += -fPIC
CFLAGS_btext.o += -fPIC
endif
ifdef CONFIG_FTRACE
# Do not trace early boot code
CFLAGS_REMOVE_cputable.o = -pg
CFLAGS_REMOVE_prom_init.o = -pg
ifdef CONFIG_DYNAMIC_FTRACE
# dynamic ftrace setup.
CFLAGS_REMOVE_ftrace.o = -pg
endif
endif
obj-y := cputable.o ptrace.o syscalls.o \
irq.o align.o signal_32.o pmc.o vdso.o \
init_task.o process.o systbl.o idle.o \
@ -78,6 +90,8 @@ obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \
obj-$(CONFIG_AUDIT) += audit.o
obj64-$(CONFIG_AUDIT) += compat_audit.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
ifneq ($(CONFIG_PPC_INDIRECT_IO),y)

View file

@ -30,6 +30,7 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
#include <asm/ftrace.h>
#undef SHOW_SYSCALLS
#undef SHOW_SYSCALLS_TASK
@ -1035,3 +1036,129 @@ machine_check_in_rtas:
/* XXX load up BATs and panic */
#endif /* CONFIG_PPC_RTAS */
#ifdef CONFIG_FTRACE
#ifdef CONFIG_DYNAMIC_FTRACE
_GLOBAL(mcount)
_GLOBAL(_mcount)
stwu r1,-48(r1)
stw r3, 12(r1)
stw r4, 16(r1)
stw r5, 20(r1)
stw r6, 24(r1)
mflr r3
stw r7, 28(r1)
mfcr r5
stw r8, 32(r1)
stw r9, 36(r1)
stw r10,40(r1)
stw r3, 44(r1)
stw r5, 8(r1)
subi r3, r3, MCOUNT_INSN_SIZE
.globl mcount_call
mcount_call:
bl ftrace_stub
nop
lwz r6, 8(r1)
lwz r0, 44(r1)
lwz r3, 12(r1)
mtctr r0
lwz r4, 16(r1)
mtcr r6
lwz r5, 20(r1)
lwz r6, 24(r1)
lwz r0, 52(r1)
lwz r7, 28(r1)
lwz r8, 32(r1)
mtlr r0
lwz r9, 36(r1)
lwz r10,40(r1)
addi r1, r1, 48
bctr
_GLOBAL(ftrace_caller)
/* Based off of objdump optput from glibc */
stwu r1,-48(r1)
stw r3, 12(r1)
stw r4, 16(r1)
stw r5, 20(r1)
stw r6, 24(r1)
mflr r3
lwz r4, 52(r1)
mfcr r5
stw r7, 28(r1)
stw r8, 32(r1)
stw r9, 36(r1)
stw r10,40(r1)
stw r3, 44(r1)
stw r5, 8(r1)
subi r3, r3, MCOUNT_INSN_SIZE
.globl ftrace_call
ftrace_call:
bl ftrace_stub
nop
lwz r6, 8(r1)
lwz r0, 44(r1)
lwz r3, 12(r1)
mtctr r0
lwz r4, 16(r1)
mtcr r6
lwz r5, 20(r1)
lwz r6, 24(r1)
lwz r0, 52(r1)
lwz r7, 28(r1)
lwz r8, 32(r1)
mtlr r0
lwz r9, 36(r1)
lwz r10,40(r1)
addi r1, r1, 48
bctr
#else
_GLOBAL(mcount)
_GLOBAL(_mcount)
stwu r1,-48(r1)
stw r3, 12(r1)
stw r4, 16(r1)
stw r5, 20(r1)
stw r6, 24(r1)
mflr r3
lwz r4, 52(r1)
mfcr r5
stw r7, 28(r1)
stw r8, 32(r1)
stw r9, 36(r1)
stw r10,40(r1)
stw r3, 44(r1)
stw r5, 8(r1)
subi r3, r3, MCOUNT_INSN_SIZE
LOAD_REG_ADDR(r5, ftrace_trace_function)
lwz r5,0(r5)
mtctr r5
bctrl
nop
lwz r6, 8(r1)
lwz r0, 44(r1)
lwz r3, 12(r1)
mtctr r0
lwz r4, 16(r1)
mtcr r6
lwz r5, 20(r1)
lwz r6, 24(r1)
lwz r0, 52(r1)
lwz r7, 28(r1)
lwz r8, 32(r1)
mtlr r0
lwz r9, 36(r1)
lwz r10,40(r1)
addi r1, r1, 48
bctr
#endif
_GLOBAL(ftrace_stub)
blr
#endif /* CONFIG_MCOUNT */

View file

@ -31,6 +31,7 @@
#include <asm/bug.h>
#include <asm/ptrace.h>
#include <asm/irqflags.h>
#include <asm/ftrace.h>
/*
* System calls.
@ -870,3 +871,67 @@ _GLOBAL(enter_prom)
ld r0,16(r1)
mtlr r0
blr
#ifdef CONFIG_FTRACE
#ifdef CONFIG_DYNAMIC_FTRACE
_GLOBAL(mcount)
_GLOBAL(_mcount)
/* Taken from output of objdump from lib64/glibc */
mflr r3
stdu r1, -112(r1)
std r3, 128(r1)
subi r3, r3, MCOUNT_INSN_SIZE
.globl mcount_call
mcount_call:
bl ftrace_stub
nop
ld r0, 128(r1)
mtlr r0
addi r1, r1, 112
blr
_GLOBAL(ftrace_caller)
/* Taken from output of objdump from lib64/glibc */
mflr r3
ld r11, 0(r1)
stdu r1, -112(r1)
std r3, 128(r1)
ld r4, 16(r11)
subi r3, r3, MCOUNT_INSN_SIZE
.globl ftrace_call
ftrace_call:
bl ftrace_stub
nop
ld r0, 128(r1)
mtlr r0
addi r1, r1, 112
_GLOBAL(ftrace_stub)
blr
#else
_GLOBAL(mcount)
blr
_GLOBAL(_mcount)
/* Taken from output of objdump from lib64/glibc */
mflr r3
ld r11, 0(r1)
stdu r1, -112(r1)
std r3, 128(r1)
ld r4, 16(r11)
subi r3, r3, MCOUNT_INSN_SIZE
LOAD_REG_ADDR(r5,ftrace_trace_function)
ld r5,0(r5)
ld r5,0(r5)
mtctr r5
bctrl
nop
ld r0, 128(r1)
mtlr r0
addi r1, r1, 112
_GLOBAL(ftrace_stub)
blr
#endif
#endif

View file

@ -0,0 +1,154 @@
/*
* Code for replacing ftrace calls with jumps.
*
* Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
*
* Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box.
*
*/
#include <linux/spinlock.h>
#include <linux/hardirq.h>
#include <linux/ftrace.h>
#include <linux/percpu.h>
#include <linux/init.h>
#include <linux/list.h>
#include <asm/cacheflush.h>
#include <asm/ftrace.h>
static unsigned int ftrace_nop = 0x60000000;
#ifdef CONFIG_PPC32
# define GET_ADDR(addr) addr
#else
/* PowerPC64's functions are data that points to the functions */
# define GET_ADDR(addr) *(unsigned long *)addr
#endif
static unsigned int notrace ftrace_calc_offset(long ip, long addr)
{
return (int)(addr - ip);
}
notrace unsigned char *ftrace_nop_replace(void)
{
return (char *)&ftrace_nop;
}
notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
{
static unsigned int op;
/*
* It would be nice to just use create_function_call, but that will
* update the code itself. Here we need to just return the
* instruction that is going to be modified, without modifying the
* code.
*/
addr = GET_ADDR(addr);
/* Set to "bl addr" */
op = 0x48000001 | (ftrace_calc_offset(ip, addr) & 0x03fffffc);
/*
* No locking needed, this must be called via kstop_machine
* which in essence is like running on a uniprocessor machine.
*/
return (unsigned char *)&op;
}
#ifdef CONFIG_PPC64
# define _ASM_ALIGN " .align 3 "
# define _ASM_PTR " .llong "
#else
# define _ASM_ALIGN " .align 2 "
# define _ASM_PTR " .long "
#endif
notrace int
ftrace_modify_code(unsigned long ip, unsigned char *old_code,
unsigned char *new_code)
{
unsigned replaced;
unsigned old = *(unsigned *)old_code;
unsigned new = *(unsigned *)new_code;
int faulted = 0;
/*
* Note: Due to modules and __init, code can
* disappear and change, we need to protect against faulting
* as well as code changing.
*
* No real locking needed, this code is run through
* kstop_machine.
*/
asm volatile (
"1: lwz %1, 0(%2)\n"
" cmpw %1, %5\n"
" bne 2f\n"
" stwu %3, 0(%2)\n"
"2:\n"
".section .fixup, \"ax\"\n"
"3: li %0, 1\n"
" b 2b\n"
".previous\n"
".section __ex_table,\"a\"\n"
_ASM_ALIGN "\n"
_ASM_PTR "1b, 3b\n"
".previous"
: "=r"(faulted), "=r"(replaced)
: "r"(ip), "r"(new),
"0"(faulted), "r"(old)
: "memory");
if (replaced != old && replaced != new)
faulted = 2;
if (!faulted)
flush_icache_range(ip, ip + 8);
return faulted;
}
notrace int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip = (unsigned long)(&ftrace_call);
unsigned char old[MCOUNT_INSN_SIZE], *new;
int ret;
memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
new = ftrace_call_replace(ip, (unsigned long)func);
ret = ftrace_modify_code(ip, old, new);
return ret;
}
notrace int ftrace_mcount_set(unsigned long *data)
{
unsigned long ip = (long)(&mcount_call);
unsigned long *addr = data;
unsigned char old[MCOUNT_INSN_SIZE], *new;
/*
* Replace the mcount stub with a pointer to the
* ip recorder function.
*/
memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
new = ftrace_call_replace(ip, *addr);
*addr = ftrace_modify_code(ip, old, new);
return 0;
}
int __init ftrace_dyn_arch_init(void *data)
{
/* This is running in kstop_machine */
ftrace_mcount_set(data);
return 0;
}

View file

@ -120,7 +120,8 @@ EXPORT_SYMBOL(_outsl_ns);
#define IO_CHECK_ALIGN(v,a) ((((unsigned long)(v)) & ((a) - 1)) == 0)
void _memset_io(volatile void __iomem *addr, int c, unsigned long n)
notrace void
_memset_io(volatile void __iomem *addr, int c, unsigned long n)
{
void *p = (void __force *)addr;
u32 lc = c;

View file

@ -98,7 +98,7 @@ EXPORT_SYMBOL(irq_desc);
int distribute_irqs = 1;
static inline unsigned long get_hard_enabled(void)
static inline notrace unsigned long get_hard_enabled(void)
{
unsigned long enabled;
@ -108,13 +108,13 @@ static inline unsigned long get_hard_enabled(void)
return enabled;
}
static inline void set_soft_enabled(unsigned long enable)
static inline notrace void set_soft_enabled(unsigned long enable)
{
__asm__ __volatile__("stb %0,%1(13)"
: : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
}
void raw_local_irq_restore(unsigned long en)
notrace void raw_local_irq_restore(unsigned long en)
{
/*
* get_paca()->soft_enabled = en;

View file

@ -42,6 +42,7 @@
#include <asm/div64.h>
#include <asm/signal.h>
#include <asm/dcr.h>
#include <asm/ftrace.h>
#ifdef CONFIG_PPC32
extern void transfer_to_handler(void);
@ -67,6 +68,10 @@ EXPORT_SYMBOL(single_step_exception);
EXPORT_SYMBOL(sys_sigreturn);
#endif
#ifdef CONFIG_FTRACE
EXPORT_SYMBOL(_mcount);
#endif
EXPORT_SYMBOL(strcpy);
EXPORT_SYMBOL(strncpy);
EXPORT_SYMBOL(strcat);

View file

@ -81,7 +81,7 @@ int ucache_bsize;
* from the address that it was linked at, so we must use RELOC/PTRRELOC
* to access static data (including strings). -- paulus
*/
unsigned long __init early_init(unsigned long dt_ptr)
notrace unsigned long __init early_init(unsigned long dt_ptr)
{
unsigned long offset = reloc_offset();
struct cpu_spec *spec;
@ -111,7 +111,7 @@ unsigned long __init early_init(unsigned long dt_ptr)
* This is called very early on the boot process, after a minimal
* MMU environment has been set up but before MMU_init is called.
*/
void __init machine_init(unsigned long dt_ptr, unsigned long phys)
notrace void __init machine_init(unsigned long dt_ptr, unsigned long phys)
{
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
@ -133,7 +133,7 @@ void __init machine_init(unsigned long dt_ptr, unsigned long phys)
#ifdef CONFIG_BOOKE_WDT
/* Checks wdt=x and wdt_period=xx command-line option */
int __init early_parse_wdt(char *p)
notrace int __init early_parse_wdt(char *p)
{
if (p && strncmp(p, "0", 1) != 0)
booke_wdt_enabled = 1;

View file

@ -1,5 +1,10 @@
CFLAGS_bootx_init.o += -fPIC
ifdef CONFIG_FTRACE
# Do not trace early boot code
CFLAGS_REMOVE_bootx_init.o = -pg
endif
obj-y += pic.o setup.o time.o feature.o pci.o \
sleep.o low_i2c.o cache.o pfunc_core.o \
pfunc_base.o

View file

@ -11,6 +11,8 @@ config SPARC
config SPARC64
bool
default y
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE
select HAVE_IDE
select HAVE_LMB
select HAVE_ARCH_KGDB

View file

@ -33,7 +33,7 @@ config DEBUG_PAGEALLOC
config MCOUNT
bool
depends on STACK_DEBUG
depends on STACK_DEBUG || FTRACE
default y
config FRAME_POINTER

View file

@ -14,6 +14,7 @@ obj-y := process.o setup.o cpu.o idprom.o \
power.o sbus.o sparc64_ksyms.o chmc.o \
visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_PCI) += ebus.o pci_common.o \
pci_psycho.o pci_sabre.o pci_schizo.o \

View file

@ -0,0 +1,94 @@
#include <linux/spinlock.h>
#include <linux/hardirq.h>
#include <linux/ftrace.h>
#include <linux/percpu.h>
#include <linux/init.h>
#include <linux/list.h>
#include <asm/ftrace.h>
static const u32 ftrace_nop = 0x01000000;
notrace unsigned char *ftrace_nop_replace(void)
{
return (char *)&ftrace_nop;
}
notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
{
static u32 call;
s32 off;
off = ((s32)addr - (s32)ip);
call = 0x40000000 | ((u32)off >> 2);
return (unsigned char *) &call;
}
notrace int
ftrace_modify_code(unsigned long ip, unsigned char *old_code,
unsigned char *new_code)
{
u32 old = *(u32 *)old_code;
u32 new = *(u32 *)new_code;
u32 replaced;
int faulted;
__asm__ __volatile__(
"1: cas [%[ip]], %[old], %[new]\n"
" flush %[ip]\n"
" mov 0, %[faulted]\n"
"2:\n"
" .section .fixup,#alloc,#execinstr\n"
" .align 4\n"
"3: sethi %%hi(2b), %[faulted]\n"
" jmpl %[faulted] + %%lo(2b), %%g0\n"
" mov 1, %[faulted]\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
" .align 4\n"
" .word 1b, 3b\n"
" .previous\n"
: "=r" (replaced), [faulted] "=r" (faulted)
: [new] "0" (new), [old] "r" (old), [ip] "r" (ip)
: "memory");
if (replaced != old && replaced != new)
faulted = 2;
return faulted;
}
notrace int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip = (unsigned long)(&ftrace_call);
unsigned char old[MCOUNT_INSN_SIZE], *new;
memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
new = ftrace_call_replace(ip, (unsigned long)func);
return ftrace_modify_code(ip, old, new);
}
notrace int ftrace_mcount_set(unsigned long *data)
{
unsigned long ip = (long)(&mcount_call);
unsigned long *addr = data;
unsigned char old[MCOUNT_INSN_SIZE], *new;
/*
* Replace the mcount stub with a pointer to the
* ip recorder function.
*/
memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
new = ftrace_call_replace(ip, *addr);
*addr = ftrace_modify_code(ip, old, new);
return 0;
}
int __init ftrace_dyn_arch_init(void *data)
{
ftrace_mcount_set(data);
return 0;
}

View file

@ -53,6 +53,7 @@
#include <asm/ns87303.h>
#include <asm/timer.h>
#include <asm/cpudata.h>
#include <asm/ftrace.h>
struct poll {
int fd;
@ -111,8 +112,7 @@ EXPORT_SYMBOL(__write_trylock);
EXPORT_SYMBOL(smp_call_function);
#endif /* CONFIG_SMP */
#if defined(CONFIG_MCOUNT)
extern void _mcount(void);
#ifdef CONFIG_MCOUNT
EXPORT_SYMBOL(_mcount);
#endif

View file

@ -28,10 +28,13 @@ ovstack:
.skip OVSTACKSIZE
#endif
.text
.align 32
.globl mcount, _mcount
mcount:
.align 32
.globl _mcount
.type _mcount,#function
.globl mcount
.type mcount,#function
_mcount:
mcount:
#ifdef CONFIG_STACK_DEBUG
/*
* Check whether %sp is dangerously low.
@ -55,6 +58,53 @@ _mcount:
or %g3, %lo(panicstring), %o0
call prom_halt
nop
1:
#endif
1: retl
#ifdef CONFIG_FTRACE
#ifdef CONFIG_DYNAMIC_FTRACE
mov %o7, %o0
.globl mcount_call
mcount_call:
call ftrace_stub
mov %o0, %o7
#else
sethi %hi(ftrace_trace_function), %g1
sethi %hi(ftrace_stub), %g2
ldx [%g1 + %lo(ftrace_trace_function)], %g1
or %g2, %lo(ftrace_stub), %g2
cmp %g1, %g2
be,pn %icc, 1f
mov %i7, %o1
jmpl %g1, %g0
mov %o7, %o0
/* not reached */
1:
#endif
#endif
retl
nop
.size _mcount,.-_mcount
.size mcount,.-mcount
#ifdef CONFIG_FTRACE
.globl ftrace_stub
.type ftrace_stub,#function
ftrace_stub:
retl
nop
.size ftrace_stub,.-ftrace_stub
#ifdef CONFIG_DYNAMIC_FTRACE
.globl ftrace_caller
.type ftrace_caller,#function
ftrace_caller:
mov %i7, %o1
mov %o7, %o0
.globl ftrace_call
ftrace_call:
call ftrace_stub
mov %o0, %o7
retl
nop
.size ftrace_caller,.-ftrace_caller
#endif
#endif

View file

@ -23,6 +23,8 @@ config X86
select HAVE_OPROFILE
select HAVE_KPROBES
select HAVE_KRETPROBES
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE
select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
select HAVE_ARCH_KGDB if !X86_VOYAGER

View file

@ -172,6 +172,14 @@ config IOMMU_LEAK
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
config PAGE_FAULT_HANDLERS
bool "Custom page fault handlers"
depends on DEBUG_KERNEL
help
Allow the use of custom page fault handlers. A kernel module may
register a function that is called on every page fault. Custom
handlers are used by some debugging and reverse engineering tools.
#
# IO delay types:
#

View file

@ -6,6 +6,13 @@ extra-y := head_$(BITS).o head$(BITS).o init_task.o vmlinux.lds
CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
ifdef CONFIG_FTRACE
# Do not profile debug utilities
CFLAGS_REMOVE_tsc_64.o = -pg
CFLAGS_REMOVE_tsc_32.o = -pg
CFLAGS_REMOVE_rtc.o = -pg
endif
#
# vsyscalls (which work on the user stack) should have
# no stack-protector checks:
@ -56,6 +63,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o
obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o

View file

@ -1,6 +1,6 @@
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/kprobes.h>
#include <linux/mm.h>
@ -143,7 +143,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
#ifdef CONFIG_X86_64
extern char __vsyscall_0;
static inline const unsigned char*const * find_nop_table(void)
const unsigned char *const *find_nop_table(void)
{
return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
@ -162,7 +162,7 @@ static const struct nop {
{ -1, NULL }
};
static const unsigned char*const * find_nop_table(void)
const unsigned char *const *find_nop_table(void)
{
const unsigned char *const *noptable = intel_nops;
int i;
@ -279,7 +279,7 @@ struct smp_alt_module {
struct list_head next;
};
static LIST_HEAD(smp_alt_modules);
static DEFINE_SPINLOCK(smp_alt);
static DEFINE_MUTEX(smp_alt);
static int smp_mode = 1; /* protected by smp_alt */
void alternatives_smp_module_add(struct module *mod, char *name,
@ -312,12 +312,12 @@ void alternatives_smp_module_add(struct module *mod, char *name,
__func__, smp->locks, smp->locks_end,
smp->text, smp->text_end, smp->name);
spin_lock(&smp_alt);
mutex_lock(&smp_alt);
list_add_tail(&smp->next, &smp_alt_modules);
if (boot_cpu_has(X86_FEATURE_UP))
alternatives_smp_unlock(smp->locks, smp->locks_end,
smp->text, smp->text_end);
spin_unlock(&smp_alt);
mutex_unlock(&smp_alt);
}
void alternatives_smp_module_del(struct module *mod)
@ -327,17 +327,17 @@ void alternatives_smp_module_del(struct module *mod)
if (smp_alt_once || noreplace_smp)
return;
spin_lock(&smp_alt);
mutex_lock(&smp_alt);
list_for_each_entry(item, &smp_alt_modules, next) {
if (mod != item->mod)
continue;
list_del(&item->next);
spin_unlock(&smp_alt);
mutex_unlock(&smp_alt);
DPRINTK("%s: %s\n", __func__, item->name);
kfree(item);
return;
}
spin_unlock(&smp_alt);
mutex_unlock(&smp_alt);
}
void alternatives_smp_switch(int smp)
@ -359,7 +359,7 @@ void alternatives_smp_switch(int smp)
return;
BUG_ON(!smp && (num_online_cpus() > 1));
spin_lock(&smp_alt);
mutex_lock(&smp_alt);
/*
* Avoid unnecessary switches because it forces JIT based VMs to
@ -383,7 +383,7 @@ void alternatives_smp_switch(int smp)
mod->text, mod->text_end);
}
smp_mode = smp;
spin_unlock(&smp_alt);
mutex_unlock(&smp_alt);
}
#endif

View file

@ -51,6 +51,7 @@
#include <asm/percpu.h>
#include <asm/dwarf2.h>
#include <asm/processor-flags.h>
#include <asm/ftrace.h>
#include "irq_vectors.h"
/*
@ -1110,6 +1111,77 @@ ENDPROC(xen_failsafe_callback)
#endif /* CONFIG_XEN */
#ifdef CONFIG_FTRACE
#ifdef CONFIG_DYNAMIC_FTRACE
ENTRY(mcount)
pushl %eax
pushl %ecx
pushl %edx
movl 0xc(%esp), %eax
subl $MCOUNT_INSN_SIZE, %eax
.globl mcount_call
mcount_call:
call ftrace_stub
popl %edx
popl %ecx
popl %eax
ret
END(mcount)
ENTRY(ftrace_caller)
pushl %eax
pushl %ecx
pushl %edx
movl 0xc(%esp), %eax
movl 0x4(%ebp), %edx
subl $MCOUNT_INSN_SIZE, %eax
.globl ftrace_call
ftrace_call:
call ftrace_stub
popl %edx
popl %ecx
popl %eax
.globl ftrace_stub
ftrace_stub:
ret
END(ftrace_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */
ENTRY(mcount)
cmpl $ftrace_stub, ftrace_trace_function
jnz trace
.globl ftrace_stub
ftrace_stub:
ret
/* taken from glibc */
trace:
pushl %eax
pushl %ecx
pushl %edx
movl 0xc(%esp), %eax
movl 0x4(%ebp), %edx
subl $MCOUNT_INSN_SIZE, %eax
call *ftrace_trace_function
popl %edx
popl %ecx
popl %eax
jmp ftrace_stub
END(mcount)
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FTRACE */
.section .rodata,"a"
#include "syscall_table_32.S"

View file

@ -51,9 +51,115 @@
#include <asm/page.h>
#include <asm/irqflags.h>
#include <asm/paravirt.h>
#include <asm/ftrace.h>
.code64
#ifdef CONFIG_FTRACE
#ifdef CONFIG_DYNAMIC_FTRACE
ENTRY(mcount)
subq $0x38, %rsp
movq %rax, (%rsp)
movq %rcx, 8(%rsp)
movq %rdx, 16(%rsp)
movq %rsi, 24(%rsp)
movq %rdi, 32(%rsp)
movq %r8, 40(%rsp)
movq %r9, 48(%rsp)
movq 0x38(%rsp), %rdi
subq $MCOUNT_INSN_SIZE, %rdi
.globl mcount_call
mcount_call:
call ftrace_stub
movq 48(%rsp), %r9
movq 40(%rsp), %r8
movq 32(%rsp), %rdi
movq 24(%rsp), %rsi
movq 16(%rsp), %rdx
movq 8(%rsp), %rcx
movq (%rsp), %rax
addq $0x38, %rsp
retq
END(mcount)
ENTRY(ftrace_caller)
/* taken from glibc */
subq $0x38, %rsp
movq %rax, (%rsp)
movq %rcx, 8(%rsp)
movq %rdx, 16(%rsp)
movq %rsi, 24(%rsp)
movq %rdi, 32(%rsp)
movq %r8, 40(%rsp)
movq %r9, 48(%rsp)
movq 0x38(%rsp), %rdi
movq 8(%rbp), %rsi
subq $MCOUNT_INSN_SIZE, %rdi
.globl ftrace_call
ftrace_call:
call ftrace_stub
movq 48(%rsp), %r9
movq 40(%rsp), %r8
movq 32(%rsp), %rdi
movq 24(%rsp), %rsi
movq 16(%rsp), %rdx
movq 8(%rsp), %rcx
movq (%rsp), %rax
addq $0x38, %rsp
.globl ftrace_stub
ftrace_stub:
retq
END(ftrace_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */
ENTRY(mcount)
cmpq $ftrace_stub, ftrace_trace_function
jnz trace
.globl ftrace_stub
ftrace_stub:
retq
trace:
/* taken from glibc */
subq $0x38, %rsp
movq %rax, (%rsp)
movq %rcx, 8(%rsp)
movq %rdx, 16(%rsp)
movq %rsi, 24(%rsp)
movq %rdi, 32(%rsp)
movq %r8, 40(%rsp)
movq %r9, 48(%rsp)
movq 0x38(%rsp), %rdi
movq 8(%rbp), %rsi
subq $MCOUNT_INSN_SIZE, %rdi
call *ftrace_trace_function
movq 48(%rsp), %r9
movq 40(%rsp), %r8
movq 32(%rsp), %rdi
movq 24(%rsp), %rsi
movq 16(%rsp), %rdx
movq 8(%rsp), %rcx
movq (%rsp), %rax
addq $0x38, %rsp
jmp ftrace_stub
END(mcount)
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FTRACE */
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
#endif

141
arch/x86/kernel/ftrace.c Normal file
View file

@ -0,0 +1,141 @@
/*
* Code for replacing ftrace calls with jumps.
*
* Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
*
* Thanks goes to Ingo Molnar, for suggesting the idea.
* Mathieu Desnoyers, for suggesting postponing the modifications.
* Arjan van de Ven, for keeping me straight, and explaining to me
* the dangers of modifying code on the run.
*/
#include <linux/spinlock.h>
#include <linux/hardirq.h>
#include <linux/ftrace.h>
#include <linux/percpu.h>
#include <linux/init.h>
#include <linux/list.h>
#include <asm/alternative.h>
#include <asm/ftrace.h>
/* Long is fine, even if it is only 4 bytes ;-) */
static long *ftrace_nop;
union ftrace_code_union {
char code[MCOUNT_INSN_SIZE];
struct {
char e8;
int offset;
} __attribute__((packed));
};
static int notrace ftrace_calc_offset(long ip, long addr)
{
return (int)(addr - ip);
}
notrace unsigned char *ftrace_nop_replace(void)
{
return (char *)ftrace_nop;
}
notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
{
static union ftrace_code_union calc;
calc.e8 = 0xe8;
calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
/*
* No locking needed, this must be called via kstop_machine
* which in essence is like running on a uniprocessor machine.
*/
return calc.code;
}
notrace int
ftrace_modify_code(unsigned long ip, unsigned char *old_code,
unsigned char *new_code)
{
unsigned replaced;
unsigned old = *(unsigned *)old_code; /* 4 bytes */
unsigned new = *(unsigned *)new_code; /* 4 bytes */
unsigned char newch = new_code[4];
int faulted = 0;
/*
* Note: Due to modules and __init, code can
* disappear and change, we need to protect against faulting
* as well as code changing.
*
* No real locking needed, this code is run through
* kstop_machine.
*/
asm volatile (
"1: lock\n"
" cmpxchg %3, (%2)\n"
" jnz 2f\n"
" movb %b4, 4(%2)\n"
"2:\n"
".section .fixup, \"ax\"\n"
"3: movl $1, %0\n"
" jmp 2b\n"
".previous\n"
_ASM_EXTABLE(1b, 3b)
: "=r"(faulted), "=a"(replaced)
: "r"(ip), "r"(new), "c"(newch),
"0"(faulted), "a"(old)
: "memory");
sync_core();
if (replaced != old && replaced != new)
faulted = 2;
return faulted;
}
notrace int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip = (unsigned long)(&ftrace_call);
unsigned char old[MCOUNT_INSN_SIZE], *new;
int ret;
memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
new = ftrace_call_replace(ip, (unsigned long)func);
ret = ftrace_modify_code(ip, old, new);
return ret;
}
notrace int ftrace_mcount_set(unsigned long *data)
{
unsigned long ip = (long)(&mcount_call);
unsigned long *addr = data;
unsigned char old[MCOUNT_INSN_SIZE], *new;
/*
* Replace the mcount stub with a pointer to the
* ip recorder function.
*/
memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
new = ftrace_call_replace(ip, *addr);
*addr = ftrace_modify_code(ip, old, new);
return 0;
}
int __init ftrace_dyn_arch_init(void *data)
{
const unsigned char *const *noptable = find_nop_table();
/* This is running in kstop_machine */
ftrace_mcount_set(data);
ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE];
return 0;
}

View file

@ -1,7 +1,14 @@
#include <linux/module.h>
#include <asm/checksum.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
#include <asm/desc.h>
#include <asm/ftrace.h>
#ifdef CONFIG_FTRACE
/* mcount is defined in assembly */
EXPORT_SYMBOL(mcount);
#endif
/* Networking helper routines. */
EXPORT_SYMBOL(csum_partial_copy_generic);

View file

@ -11,6 +11,8 @@
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/numa.h>
#include <linux/ftrace.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
@ -107,6 +109,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
unsigned long page_list[PAGES_NR];
void *control_page;
tracer_disable();
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();

View file

@ -11,6 +11,8 @@
#include <linux/string.h>
#include <linux/reboot.h>
#include <linux/numa.h>
#include <linux/ftrace.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
@ -184,6 +186,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
unsigned long page_list[PAGES_NR];
void *control_page;
tracer_disable();
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();

View file

@ -185,7 +185,10 @@ void cpu_idle(void)
local_irq_disable();
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
/* Don't trace irqs off for idle */
stop_critical_timings();
idle();
start_critical_timings();
}
tick_nohz_restart_sched_tick();
preempt_enable_no_resched();

View file

@ -165,7 +165,10 @@ void cpu_idle(void)
*/
local_irq_disable();
enter_idle();
/* Don't trace irqs off for idle */
stop_critical_timings();
idle();
start_critical_timings();
/* In many cases the interrupt that ended idle
has already called exit_idle. But some idle
loops can be woken up without interrupt. */

View file

@ -42,7 +42,8 @@
#include <asm/topology.h>
#include <asm/vgtod.h>
#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
#define __vsyscall(nr) \
__attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
#define __syscall_clobber "r11","cx","memory"
/*

View file

@ -2,13 +2,20 @@
All C exports should go in the respective C files. */
#include <linux/module.h>
#include <net/checksum.h>
#include <linux/smp.h>
#include <net/checksum.h>
#include <asm/processor.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
#include <asm/desc.h>
#include <asm/ftrace.h>
#ifdef CONFIG_FTRACE
/* mcount is defined in assembly */
EXPORT_SYMBOL(mcount);
#endif
EXPORT_SYMBOL(kernel_thread);

View file

@ -5,6 +5,7 @@
obj-$(CONFIG_SMP) := msr-on-cpu.o
lib-y := delay_$(BITS).o
lib-y += thunk_$(BITS).o
lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o
lib-y += memcpy_$(BITS).o

47
arch/x86/lib/thunk_32.S Normal file
View file

@ -0,0 +1,47 @@
/*
* Trampoline to trace irqs off. (otherwise CALLER_ADDR1 might crash)
* Copyright 2008 by Steven Rostedt, Red Hat, Inc
* (inspired by Andi Kleen's thunk_64.S)
* Subject to the GNU public license, v.2. No warranty of any kind.
*/
#include <linux/linkage.h>
#define ARCH_TRACE_IRQS_ON \
pushl %eax; \
pushl %ecx; \
pushl %edx; \
call trace_hardirqs_on; \
popl %edx; \
popl %ecx; \
popl %eax;
#define ARCH_TRACE_IRQS_OFF \
pushl %eax; \
pushl %ecx; \
pushl %edx; \
call trace_hardirqs_off; \
popl %edx; \
popl %ecx; \
popl %eax;
#ifdef CONFIG_TRACE_IRQFLAGS
/* put return address in eax (arg1) */
.macro thunk_ra name,func
.globl \name
\name:
pushl %eax
pushl %ecx
pushl %edx
/* Place EIP in the arg1 */
movl 3*4(%esp), %eax
call \func
popl %edx
popl %ecx
popl %eax
ret
.endm
thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
#endif

View file

@ -2,6 +2,7 @@
* Save registers before calling assembly functions. This avoids
* disturbance of register allocation in some inline assembly constructs.
* Copyright 2001,2002 by Andi Kleen, SuSE Labs.
* Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc.
* Subject to the GNU public license, v.2. No warranty of any kind.
*/
@ -42,8 +43,22 @@
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
thunk trace_hardirqs_on_thunk,trace_hardirqs_on
thunk trace_hardirqs_off_thunk,trace_hardirqs_off
/* put return address in rdi (arg1) */
.macro thunk_ra name,func
.globl \name
\name:
CFI_STARTPROC
SAVE_ARGS
/* SAVE_ARGS pushs 9 elements */
/* the next element would be the rip */
movq 9*8(%rsp), %rdi
call \func
jmp restore
CFI_ENDPROC
.endm
thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC

View file

@ -49,6 +49,60 @@
#define PF_RSVD (1<<3)
#define PF_INSTR (1<<4)
#ifdef CONFIG_PAGE_FAULT_HANDLERS
static HLIST_HEAD(pf_handlers); /* protected by RCU */
static DEFINE_SPINLOCK(pf_handlers_writer);
void register_page_fault_handler(struct pf_handler *new_pfh)
{
unsigned long flags;
spin_lock_irqsave(&pf_handlers_writer, flags);
hlist_add_head_rcu(&new_pfh->hlist, &pf_handlers);
spin_unlock_irqrestore(&pf_handlers_writer, flags);
}
EXPORT_SYMBOL_GPL(register_page_fault_handler);
/**
* unregister_page_fault_handler:
* The caller must ensure @old_pfh is not in use anymore before freeing it.
* This function does not guarantee it. The list of handlers is protected by
* RCU, so you can do this by e.g. calling synchronize_rcu().
*/
void unregister_page_fault_handler(struct pf_handler *old_pfh)
{
unsigned long flags;
spin_lock_irqsave(&pf_handlers_writer, flags);
hlist_del_rcu(&old_pfh->hlist);
spin_unlock_irqrestore(&pf_handlers_writer, flags);
}
EXPORT_SYMBOL_GPL(unregister_page_fault_handler);
#endif
/* returns non-zero if do_page_fault() should return */
static int handle_custom_pf(struct pt_regs *regs, unsigned long error_code,
unsigned long address)
{
#ifdef CONFIG_PAGE_FAULT_HANDLERS
int ret = 0;
struct pf_handler *cur;
struct hlist_node *ncur;
if (hlist_empty(&pf_handlers))
return 0;
rcu_read_lock();
hlist_for_each_entry_rcu(cur, ncur, &pf_handlers, hlist) {
ret = cur->handler(regs, error_code, address);
if (ret)
break;
}
rcu_read_unlock();
return ret;
#else
return 0;
#endif
}
static inline int notify_page_fault(struct pt_regs *regs)
{
#ifdef CONFIG_KPROBES
@ -606,6 +660,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (notify_page_fault(regs))
return;
if (handle_custom_pf(regs, error_code, address))
return;
/*
* We fault-in kernel-space virtual memory on-demand. The

View file

@ -710,6 +710,8 @@ void mark_rodata_ro(void)
unsigned long start = PFN_ALIGN(_text);
unsigned long size = PFN_ALIGN(_etext) - start;
#ifndef CONFIG_DYNAMIC_FTRACE
/* Dynamic tracing modifies the kernel text section */
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
printk(KERN_INFO "Write protecting the kernel text: %luk\n",
size >> 10);
@ -722,6 +724,8 @@ void mark_rodata_ro(void)
printk(KERN_INFO "Testing CPA: write protecting again\n");
set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
#endif
#endif /* CONFIG_DYNAMIC_FTRACE */
start += size;
size = (unsigned long)__end_rodata - start;
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);

View file

@ -767,6 +767,13 @@ EXPORT_SYMBOL_GPL(rodata_test_data);
void mark_rodata_ro(void)
{
unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
unsigned long rodata_start =
((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
#ifdef CONFIG_DYNAMIC_FTRACE
/* Dynamic tracing modifies the kernel text section */
start = rodata_start;
#endif
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
(end - start) >> 10);
@ -776,8 +783,7 @@ void mark_rodata_ro(void)
* The rodata section (but not the kernel text!) should also be
* not-executable.
*/
start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
set_memory_nx(start, (end - start) >> PAGE_SHIFT);
set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT);
rodata_test();

View file

@ -23,7 +23,7 @@
#define gtod vdso_vsyscall_gtod_data
static long vdso_fallback_gettime(long clock, struct timespec *ts)
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
long ret;
asm("syscall" : "=a" (ret) :
@ -31,7 +31,7 @@ static long vdso_fallback_gettime(long clock, struct timespec *ts)
return ret;
}
static inline long vgetns(void)
notrace static inline long vgetns(void)
{
long v;
cycles_t (*vread)(void);
@ -40,7 +40,7 @@ static inline long vgetns(void)
return (v * gtod->clock.mult) >> gtod->clock.shift;
}
static noinline int do_realtime(struct timespec *ts)
notrace static noinline int do_realtime(struct timespec *ts)
{
unsigned long seq, ns;
do {
@ -54,7 +54,8 @@ static noinline int do_realtime(struct timespec *ts)
}
/* Copy of the version in kernel/time.c which we cannot directly access */
static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
notrace static void
vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
{
while (nsec >= NSEC_PER_SEC) {
nsec -= NSEC_PER_SEC;
@ -68,7 +69,7 @@ static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
ts->tv_nsec = nsec;
}
static noinline int do_monotonic(struct timespec *ts)
notrace static noinline int do_monotonic(struct timespec *ts)
{
unsigned long seq, ns, secs;
do {
@ -82,7 +83,7 @@ static noinline int do_monotonic(struct timespec *ts)
return 0;
}
int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
{
if (likely(gtod->sysctl_enabled && gtod->clock.vread))
switch (clock) {
@ -96,7 +97,7 @@ int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
int clock_gettime(clockid_t, struct timespec *)
__attribute__((weak, alias("__vdso_clock_gettime")));
int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
{
long ret;
if (likely(gtod->sysctl_enabled && gtod->clock.vread)) {

View file

@ -13,7 +13,8 @@
#include <asm/vgtod.h>
#include "vextern.h"
long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
notrace long
__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
{
unsigned int p;

14
include/asm-arm/ftrace.h Normal file
View file

@ -0,0 +1,14 @@
#ifndef _ASM_ARM_FTRACE
#define _ASM_ARM_FTRACE
#ifdef CONFIG_FTRACE
#define MCOUNT_ADDR ((long)(mcount))
#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
#ifndef __ASSEMBLY__
extern void mcount(void);
#endif
#endif
#endif /* _ASM_ARM_FTRACE */

View file

@ -59,6 +59,7 @@ struct kprobe_ctlblk {
};
void arch_remove_kprobe(struct kprobe *);
void kretprobe_trampoline(void);
int kprobe_trap_handler(struct pt_regs *regs, unsigned int instr);
int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr);

View file

@ -0,0 +1,14 @@
#ifndef _ASM_POWERPC_FTRACE
#define _ASM_POWERPC_FTRACE
#ifdef CONFIG_FTRACE
#define MCOUNT_ADDR ((long)(_mcount))
#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
#ifndef __ASSEMBLY__
extern void _mcount(void);
#endif
#endif
#endif /* _ASM_POWERPC_FTRACE */

View file

@ -59,6 +59,11 @@ extern void iseries_handle_interrupts(void);
get_paca()->hard_enabled = 0; \
} while(0)
static inline int irqs_disabled_flags(unsigned long flags)
{
return flags == 0;
}
#else
#if defined(CONFIG_BOOKE)
@ -113,6 +118,11 @@ static inline void local_irq_save_ptr(unsigned long *flags)
#define hard_irq_enable() local_irq_enable()
#define hard_irq_disable() local_irq_disable()
static inline int irqs_disabled_flags(unsigned long flags)
{
return (flags & MSR_EE) == 0;
}
#endif /* CONFIG_PPC64 */
/*

View file

@ -0,0 +1,14 @@
#ifndef _ASM_SPARC64_FTRACE
#define _ASM_SPARC64_FTRACE
#ifdef CONFIG_MCOUNT
#define MCOUNT_ADDR ((long)(_mcount))
#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
#ifndef __ASSEMBLY__
extern void _mcount(void);
#endif
#endif
#endif /* _ASM_SPARC64_FTRACE */

View file

@ -72,6 +72,8 @@ static inline void alternatives_smp_module_del(struct module *mod) {}
static inline void alternatives_smp_switch(int smp) {}
#endif /* CONFIG_SMP */
const unsigned char *const *find_nop_table(void);
/*
* Alternative instructions for different CPU types or capabilities.
*

14
include/asm-x86/ftrace.h Normal file
View file

@ -0,0 +1,14 @@
#ifndef _ASM_X86_FTRACE
#define _ASM_SPARC64_FTRACE
#ifdef CONFIG_FTRACE
#define MCOUNT_ADDR ((long)(mcount))
#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
#ifndef __ASSEMBLY__
extern void mcount(void);
#endif
#endif /* CONFIG_FTRACE */
#endif /* _ASM_X86_FTRACE */

View file

@ -179,8 +179,6 @@ static inline void trace_hardirqs_fixup(void)
* have a reliable stack. x86_64 only.
*/
#define SWAPGS_UNSAFE_STACK swapgs
#define ARCH_TRACE_IRQS_ON call trace_hardirqs_on_thunk
#define ARCH_TRACE_IRQS_OFF call trace_hardirqs_off_thunk
#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
#define ARCH_LOCKDEP_SYS_EXIT_IRQ \
TRACE_IRQS_ON; \
@ -192,24 +190,6 @@ static inline void trace_hardirqs_fixup(void)
TRACE_IRQS_OFF;
#else
#define ARCH_TRACE_IRQS_ON \
pushl %eax; \
pushl %ecx; \
pushl %edx; \
call trace_hardirqs_on; \
popl %edx; \
popl %ecx; \
popl %eax;
#define ARCH_TRACE_IRQS_OFF \
pushl %eax; \
pushl %ecx; \
pushl %edx; \
call trace_hardirqs_off; \
popl %edx; \
popl %ecx; \
popl %eax;
#define ARCH_LOCKDEP_SYS_EXIT \
pushl %eax; \
pushl %ecx; \
@ -223,8 +203,8 @@ static inline void trace_hardirqs_fixup(void)
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
# define TRACE_IRQS_ON ARCH_TRACE_IRQS_ON
# define TRACE_IRQS_OFF ARCH_TRACE_IRQS_OFF
# define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
#else
# define TRACE_IRQS_ON
# define TRACE_IRQS_OFF

View file

@ -35,4 +35,13 @@ extern void show_regs(struct pt_regs *regs);
extern unsigned long oops_begin(void);
extern void oops_end(unsigned long, struct pt_regs *, int signr);
struct pf_handler {
struct hlist_node hlist;
int (*handler)(struct pt_regs *regs, unsigned long error_code,
unsigned long address);
};
extern void register_page_fault_handler(struct pf_handler *new_pfh);
extern void unregister_page_fault_handler(struct pf_handler *old_pfh);
#endif

View file

@ -24,7 +24,8 @@ enum vsyscall_num {
((unused, __section__ (".vsyscall_gtod_data"),aligned(16)))
#define __section_vsyscall_clock __attribute__ \
((unused, __section__ (".vsyscall_clock"),aligned(16)))
#define __vsyscall_fn __attribute__ ((unused,__section__(".vsyscall_fn")))
#define __vsyscall_fn \
__attribute__ ((unused, __section__(".vsyscall_fn"))) notrace
#define VGETCPU_RDTSCP 1
#define VGETCPU_LSL 2

143
include/linux/ftrace.h Normal file
View file

@ -0,0 +1,143 @@
#ifndef _LINUX_FTRACE_H
#define _LINUX_FTRACE_H
#ifdef CONFIG_FTRACE
#include <linux/linkage.h>
#include <linux/fs.h>
extern int ftrace_enabled;
extern int
ftrace_enable_sysctl(struct ctl_table *table, int write,
struct file *filp, void __user *buffer, size_t *lenp,
loff_t *ppos);
typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
struct ftrace_ops {
ftrace_func_t func;
struct ftrace_ops *next;
};
/*
* The ftrace_ops must be a static and should also
* be read_mostly. These functions do modify read_mostly variables
* so use them sparely. Never free an ftrace_op or modify the
* next pointer after it has been registered. Even after unregistering
* it, the next pointer may still be used internally.
*/
int register_ftrace_function(struct ftrace_ops *ops);
int unregister_ftrace_function(struct ftrace_ops *ops);
void clear_ftrace_function(void);
extern void ftrace_stub(unsigned long a0, unsigned long a1);
#else /* !CONFIG_FTRACE */
# define register_ftrace_function(ops) do { } while (0)
# define unregister_ftrace_function(ops) do { } while (0)
# define clear_ftrace_function(ops) do { } while (0)
#endif /* CONFIG_FTRACE */
#ifdef CONFIG_DYNAMIC_FTRACE
# define FTRACE_HASHBITS 10
# define FTRACE_HASHSIZE (1<<FTRACE_HASHBITS)
enum {
FTRACE_FL_FREE = (1 << 0),
FTRACE_FL_FAILED = (1 << 1),
FTRACE_FL_FILTER = (1 << 2),
FTRACE_FL_ENABLED = (1 << 3),
FTRACE_FL_NOTRACE = (1 << 4),
FTRACE_FL_CONVERTED = (1 << 5),
FTRACE_FL_FROZEN = (1 << 6),
};
struct dyn_ftrace {
struct hlist_node node;
unsigned long ip; /* address of mcount call-site */
unsigned long flags;
};
int ftrace_force_update(void);
void ftrace_set_filter(unsigned char *buf, int len, int reset);
/* defined in arch */
extern int ftrace_ip_converted(unsigned long ip);
extern unsigned char *ftrace_nop_replace(void);
extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr);
extern int ftrace_dyn_arch_init(void *data);
extern int ftrace_mcount_set(unsigned long *data);
extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code,
unsigned char *new_code);
extern int ftrace_update_ftrace_func(ftrace_func_t func);
extern void ftrace_caller(void);
extern void ftrace_call(void);
extern void mcount_call(void);
extern int skip_trace(unsigned long ip);
void ftrace_disable_daemon(void);
void ftrace_enable_daemon(void);
#else
# define skip_trace(ip) ({ 0; })
# define ftrace_force_update() ({ 0; })
# define ftrace_set_filter(buf, len, reset) do { } while (0)
# define ftrace_disable_daemon() do { } while (0)
# define ftrace_enable_daemon() do { } while (0)
#endif /* CONFIG_DYNAMIC_FTRACE */
/* totally disable ftrace - can not re-enable after this */
void ftrace_kill(void);
static inline void tracer_disable(void)
{
#ifdef CONFIG_FTRACE
ftrace_enabled = 0;
#endif
}
#ifdef CONFIG_FRAME_POINTER
/* TODO: need to fix this for ARM */
# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1))
# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2))
# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3))
# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4))
# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5))
# define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6))
#else
# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
# define CALLER_ADDR1 0UL
# define CALLER_ADDR2 0UL
# define CALLER_ADDR3 0UL
# define CALLER_ADDR4 0UL
# define CALLER_ADDR5 0UL
# define CALLER_ADDR6 0UL
#endif
#ifdef CONFIG_IRQSOFF_TRACER
extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
extern void time_hardirqs_off(unsigned long a0, unsigned long a1);
#else
# define time_hardirqs_on(a0, a1) do { } while (0)
# define time_hardirqs_off(a0, a1) do { } while (0)
#endif
#ifdef CONFIG_PREEMPT_TRACER
extern void trace_preempt_on(unsigned long a0, unsigned long a1);
extern void trace_preempt_off(unsigned long a0, unsigned long a1);
#else
# define trace_preempt_on(a0, a1) do { } while (0)
# define trace_preempt_off(a0, a1) do { } while (0)
#endif
#ifdef CONFIG_TRACING
extern void
ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
#else
static inline void
ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
#endif
#endif /* _LINUX_FTRACE_H */

View file

@ -12,10 +12,10 @@
#define _LINUX_TRACE_IRQFLAGS_H
#ifdef CONFIG_TRACE_IRQFLAGS
extern void trace_hardirqs_on(void);
extern void trace_hardirqs_off(void);
extern void trace_softirqs_on(unsigned long ip);
extern void trace_softirqs_off(unsigned long ip);
extern void trace_hardirqs_on(void);
extern void trace_hardirqs_off(void);
# define trace_hardirq_context(p) ((p)->hardirq_context)
# define trace_softirq_context(p) ((p)->softirq_context)
# define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled)
@ -41,6 +41,15 @@
# define INIT_TRACE_IRQFLAGS
#endif
#if defined(CONFIG_IRQSOFF_TRACER) || \
defined(CONFIG_PREEMPT_TRACER)
extern void stop_critical_timings(void);
extern void start_critical_timings(void);
#else
# define stop_critical_timings() do { } while (0)
# define start_critical_timings() do { } while (0)
#endif
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
#include <asm/irqflags.h>

View file

@ -259,6 +259,10 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
struct jprobe;
struct kretprobe;
static inline struct kprobe *get_kprobe(void *addr)
{
return NULL;
}
static inline struct kprobe *kprobe_running(void)
{
return NULL;

View file

@ -3,6 +3,8 @@
#include <asm/linkage.h>
#define notrace __attribute__((no_instrument_function))
#ifdef __cplusplus
#define CPP_ASMLINKAGE extern "C"
#else

View file

@ -44,8 +44,8 @@ struct marker {
*/
char state; /* Marker state. */
char ptype; /* probe type : 0 : single, 1 : multi */
void (*call)(const struct marker *mdata, /* Probe wrapper */
void *call_private, const char *fmt, ...);
/* Probe wrapper */
void (*call)(const struct marker *mdata, void *call_private, ...);
struct marker_probe_closure single;
struct marker_probe_closure *multi;
} __attribute__((aligned(8)));
@ -58,8 +58,12 @@ struct marker {
* Make sure the alignment of the structure in the __markers section will
* not add unwanted padding between the beginning of the section and the
* structure. Force alignment to the same alignment as the section start.
*
* The "generic" argument controls which marker enabling mechanism must be used.
* If generic is true, a variable read is used.
* If generic is false, immediate values are used.
*/
#define __trace_mark(name, call_private, format, args...) \
#define __trace_mark(generic, name, call_private, format, args...) \
do { \
static const char __mstrtab_##name[] \
__attribute__((section("__markers_strings"))) \
@ -72,15 +76,14 @@ struct marker {
__mark_check_format(format, ## args); \
if (unlikely(__mark_##name.state)) { \
(*__mark_##name.call) \
(&__mark_##name, call_private, \
format, ## args); \
(&__mark_##name, call_private, ## args);\
} \
} while (0)
extern void marker_update_probe_range(struct marker *begin,
struct marker *end);
#else /* !CONFIG_MARKERS */
#define __trace_mark(name, call_private, format, args...) \
#define __trace_mark(generic, name, call_private, format, args...) \
__mark_check_format(format, ## args)
static inline void marker_update_probe_range(struct marker *begin,
struct marker *end)
@ -88,15 +91,30 @@ static inline void marker_update_probe_range(struct marker *begin,
#endif /* CONFIG_MARKERS */
/**
* trace_mark - Marker
* trace_mark - Marker using code patching
* @name: marker name, not quoted.
* @format: format string
* @args...: variable argument list
*
* Places a marker.
* Places a marker using optimized code patching technique (imv_read())
* to be enabled when immediate values are present.
*/
#define trace_mark(name, format, args...) \
__trace_mark(name, NULL, format, ## args)
__trace_mark(0, name, NULL, format, ## args)
/**
* _trace_mark - Marker using variable read
* @name: marker name, not quoted.
* @format: format string
* @args...: variable argument list
*
* Places a marker using a standard memory read (_imv_read()) to be
* enabled. Should be used for markers in code paths where instruction
* modification based enabling is not welcome. (__init and __exit functions,
* lockdep, some traps, printk).
*/
#define _trace_mark(name, format, args...) \
__trace_mark(1, name, NULL, format, ## args)
/**
* MARK_NOARGS - Format string for a marker with no argument.
@ -117,9 +135,9 @@ static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...)
extern marker_probe_func __mark_empty_function;
extern void marker_probe_cb(const struct marker *mdata,
void *call_private, const char *fmt, ...);
void *call_private, ...);
extern void marker_probe_cb_noarg(const struct marker *mdata,
void *call_private, const char *fmt, ...);
void *call_private, ...);
/*
* Connect a probe to a marker.

View file

@ -10,7 +10,7 @@
#include <linux/linkage.h>
#include <linux/list.h>
#ifdef CONFIG_DEBUG_PREEMPT
#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
extern void add_preempt_count(int val);
extern void sub_preempt_count(int val);
#else
@ -52,6 +52,34 @@ do { \
preempt_check_resched(); \
} while (0)
/* For debugging and tracer internals only! */
#define add_preempt_count_notrace(val) \
do { preempt_count() += (val); } while (0)
#define sub_preempt_count_notrace(val) \
do { preempt_count() -= (val); } while (0)
#define inc_preempt_count_notrace() add_preempt_count_notrace(1)
#define dec_preempt_count_notrace() sub_preempt_count_notrace(1)
#define preempt_disable_notrace() \
do { \
inc_preempt_count_notrace(); \
barrier(); \
} while (0)
#define preempt_enable_no_resched_notrace() \
do { \
barrier(); \
dec_preempt_count_notrace(); \
} while (0)
/* preempt_check_resched is OK to trace */
#define preempt_enable_notrace() \
do { \
preempt_enable_no_resched_notrace(); \
barrier(); \
preempt_check_resched(); \
} while (0)
#else
#define preempt_disable() do { } while (0)
@ -59,6 +87,10 @@ do { \
#define preempt_enable() do { } while (0)
#define preempt_check_resched() do { } while (0)
#define preempt_disable_notrace() do { } while (0)
#define preempt_enable_no_resched_notrace() do { } while (0)
#define preempt_enable_notrace() do { } while (0)
#endif
#ifdef CONFIG_PREEMPT_NOTIFIERS

View file

@ -246,6 +246,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
extern void init_idle(struct task_struct *idle, int cpu);
extern void init_idle_bootup_task(struct task_struct *idle);
extern int runqueue_is_locked(void);
extern cpumask_t nohz_cpu_mask;
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
extern int select_nohz_load_balancer(int cpu);
@ -2131,6 +2133,18 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm)
}
#endif
#ifdef CONFIG_TRACING
extern void
__trace_special(void *__tr, void *__data,
unsigned long arg1, unsigned long arg2, unsigned long arg3);
#else
static inline void
__trace_special(void *__tr, void *__data,
unsigned long arg1, unsigned long arg2, unsigned long arg3)
{
}
#endif
extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask);
extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
@ -2225,6 +2239,8 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
}
#endif /* CONFIG_MM_OWNER */
#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
#endif /* __KERNEL__ */
#endif

View file

@ -105,6 +105,8 @@ extern int vm_highmem_is_dirtyable;
extern int block_dump;
extern int laptop_mode;
extern unsigned long determine_dirtyable_memory(void);
extern int dirty_ratio_handler(struct ctl_table *table, int write,
struct file *filp, void __user *buffer, size_t *lenp,
loff_t *ppos);

View file

@ -11,6 +11,18 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
notifier.o ksysfs.o pm_qos_params.o sched_clock.o
CFLAGS_REMOVE_sched.o = -pg -mno-spe
ifdef CONFIG_FTRACE
# Do not trace debug files and internal ftrace files
CFLAGS_REMOVE_lockdep.o = -pg
CFLAGS_REMOVE_lockdep_proc.o = -pg
CFLAGS_REMOVE_mutex-debug.o = -pg
CFLAGS_REMOVE_rtmutex-debug.o = -pg
CFLAGS_REMOVE_cgroup-debug.o = -pg
CFLAGS_REMOVE_sched_clock.o = -pg
endif
obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += time/
@ -69,6 +81,8 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
obj-$(CONFIG_MARKERS) += marker.o
obj-$(CONFIG_LATENCYTOP) += latencytop.o
obj-$(CONFIG_FTRACE) += trace/
obj-$(CONFIG_TRACING) += trace/
ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is

View file

@ -909,7 +909,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
rt_mutex_init_task(p);
#ifdef CONFIG_TRACE_IRQFLAGS
#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_LOCKDEP)
DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endif

View file

@ -39,6 +39,7 @@
#include <linux/irqflags.h>
#include <linux/utsname.h>
#include <linux/hash.h>
#include <linux/ftrace.h>
#include <asm/sections.h>
@ -81,6 +82,8 @@ static int graph_lock(void)
__raw_spin_unlock(&lockdep_lock);
return 0;
}
/* prevent any recursions within lockdep from causing deadlocks */
current->lockdep_recursion++;
return 1;
}
@ -89,6 +92,7 @@ static inline int graph_unlock(void)
if (debug_locks && !__raw_spin_is_locked(&lockdep_lock))
return DEBUG_LOCKS_WARN_ON(1);
current->lockdep_recursion--;
__raw_spin_unlock(&lockdep_lock);
return 0;
}
@ -982,7 +986,7 @@ check_noncircular(struct lock_class *source, unsigned int depth)
return 1;
}
#ifdef CONFIG_TRACE_IRQFLAGS
#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
/*
* Forwards and backwards subgraph searching, for the purposes of
* proving that two subgraphs can be connected by a new dependency
@ -1680,7 +1684,7 @@ valid_state(struct task_struct *curr, struct held_lock *this,
static int mark_lock(struct task_struct *curr, struct held_lock *this,
enum lock_usage_bit new_bit);
#ifdef CONFIG_TRACE_IRQFLAGS
#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
/*
* print irq inversion bug:
@ -2013,11 +2017,13 @@ void early_boot_irqs_on(void)
/*
* Hardirqs will be enabled:
*/
void trace_hardirqs_on(void)
void trace_hardirqs_on_caller(unsigned long a0)
{
struct task_struct *curr = current;
unsigned long ip;
time_hardirqs_on(CALLER_ADDR0, a0);
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
@ -2055,16 +2061,23 @@ void trace_hardirqs_on(void)
curr->hardirq_enable_event = ++curr->irq_events;
debug_atomic_inc(&hardirqs_on_events);
}
EXPORT_SYMBOL(trace_hardirqs_on_caller);
void trace_hardirqs_on(void)
{
trace_hardirqs_on_caller(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_on);
/*
* Hardirqs were disabled:
*/
void trace_hardirqs_off(void)
void trace_hardirqs_off_caller(unsigned long a0)
{
struct task_struct *curr = current;
time_hardirqs_off(CALLER_ADDR0, a0);
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
@ -2082,7 +2095,12 @@ void trace_hardirqs_off(void)
} else
debug_atomic_inc(&redundant_hardirqs_off);
}
EXPORT_SYMBOL(trace_hardirqs_off_caller);
void trace_hardirqs_off(void)
{
trace_hardirqs_off_caller(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_off);
/*
@ -2246,7 +2264,7 @@ static inline int separate_irq_context(struct task_struct *curr,
* Mark a lock with a usage bit, and validate the state transition:
*/
static int mark_lock(struct task_struct *curr, struct held_lock *this,
enum lock_usage_bit new_bit)
enum lock_usage_bit new_bit)
{
unsigned int new_mask = 1 << new_bit, ret = 1;
@ -2686,7 +2704,7 @@ static void check_flags(unsigned long flags)
* and also avoid lockdep recursion:
*/
void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
int trylock, int read, int check, unsigned long ip)
int trylock, int read, int check, unsigned long ip)
{
unsigned long flags;
@ -2708,7 +2726,8 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
EXPORT_SYMBOL_GPL(lock_acquire);
void lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
void lock_release(struct lockdep_map *lock, int nested,
unsigned long ip)
{
unsigned long flags;

View file

@ -55,8 +55,8 @@ static DEFINE_MUTEX(markers_mutex);
struct marker_entry {
struct hlist_node hlist;
char *format;
void (*call)(const struct marker *mdata, /* Probe wrapper */
void *call_private, const char *fmt, ...);
/* Probe wrapper */
void (*call)(const struct marker *mdata, void *call_private, ...);
struct marker_probe_closure single;
struct marker_probe_closure *multi;
int refcount; /* Number of times armed. 0 if disarmed. */
@ -91,15 +91,13 @@ EXPORT_SYMBOL_GPL(__mark_empty_function);
* marker_probe_cb Callback that prepares the variable argument list for probes.
* @mdata: pointer of type struct marker
* @call_private: caller site private data
* @fmt: format string
* @...: Variable argument list.
*
* Since we do not use "typical" pointer based RCU in the 1 argument case, we
* need to put a full smp_rmb() in this branch. This is why we do not use
* rcu_dereference() for the pointer read.
*/
void marker_probe_cb(const struct marker *mdata, void *call_private,
const char *fmt, ...)
void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
{
va_list args;
char ptype;
@ -120,8 +118,9 @@ void marker_probe_cb(const struct marker *mdata, void *call_private,
/* Must read the ptr before private data. They are not data
* dependant, so we put an explicit smp_rmb() here. */
smp_rmb();
va_start(args, fmt);
func(mdata->single.probe_private, call_private, fmt, &args);
va_start(args, call_private);
func(mdata->single.probe_private, call_private, mdata->format,
&args);
va_end(args);
} else {
struct marker_probe_closure *multi;
@ -136,9 +135,9 @@ void marker_probe_cb(const struct marker *mdata, void *call_private,
smp_read_barrier_depends();
multi = mdata->multi;
for (i = 0; multi[i].func; i++) {
va_start(args, fmt);
multi[i].func(multi[i].probe_private, call_private, fmt,
&args);
va_start(args, call_private);
multi[i].func(multi[i].probe_private, call_private,
mdata->format, &args);
va_end(args);
}
}
@ -150,13 +149,11 @@ EXPORT_SYMBOL_GPL(marker_probe_cb);
* marker_probe_cb Callback that does not prepare the variable argument list.
* @mdata: pointer of type struct marker
* @call_private: caller site private data
* @fmt: format string
* @...: Variable argument list.
*
* Should be connected to markers "MARK_NOARGS".
*/
void marker_probe_cb_noarg(const struct marker *mdata,
void *call_private, const char *fmt, ...)
void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
{
va_list args; /* not initialized */
char ptype;
@ -172,7 +169,8 @@ void marker_probe_cb_noarg(const struct marker *mdata,
/* Must read the ptr before private data. They are not data
* dependant, so we put an explicit smp_rmb() here. */
smp_rmb();
func(mdata->single.probe_private, call_private, fmt, &args);
func(mdata->single.probe_private, call_private, mdata->format,
&args);
} else {
struct marker_probe_closure *multi;
int i;
@ -186,8 +184,8 @@ void marker_probe_cb_noarg(const struct marker *mdata,
smp_read_barrier_depends();
multi = mdata->multi;
for (i = 0; multi[i].func; i++)
multi[i].func(multi[i].probe_private, call_private, fmt,
&args);
multi[i].func(multi[i].probe_private, call_private,
mdata->format, &args);
}
preempt_enable();
}

View file

@ -1041,7 +1041,9 @@ void release_console_sem(void)
_log_end = log_end;
con_start = log_end; /* Flush */
spin_unlock(&logbuf_lock);
stop_critical_timings(); /* don't trace print latency */
call_console_drivers(_con_start, _log_end);
start_critical_timings();
local_irq_restore(flags);
}
console_locked = 0;

View file

@ -70,6 +70,7 @@
#include <linux/bootmem.h>
#include <linux/debugfs.h>
#include <linux/ctype.h>
#include <linux/ftrace.h>
#include <asm/tlb.h>
#include <asm/irq_regs.h>
@ -607,6 +608,24 @@ static inline void update_rq_clock(struct rq *rq)
# define const_debug static const
#endif
/**
* runqueue_is_locked
*
* Returns true if the current cpu runqueue is locked.
* This interface allows printk to be called with the runqueue lock
* held and know whether or not it is OK to wake up the klogd.
*/
int runqueue_is_locked(void)
{
int cpu = get_cpu();
struct rq *rq = cpu_rq(cpu);
int ret;
ret = spin_is_locked(&rq->lock);
put_cpu();
return ret;
}
/*
* Debugging: various feature bits
*/
@ -831,7 +850,7 @@ static unsigned long long __cpu_clock(int cpu)
* For kernel-internal use: high-speed (but slightly incorrect) per-cpu
* clock constructed from sched_clock():
*/
unsigned long long cpu_clock(int cpu)
unsigned long long notrace cpu_clock(int cpu)
{
unsigned long long prev_cpu_time, time, delta_time;
unsigned long flags;
@ -2149,6 +2168,9 @@ out_activate:
success = 1;
out_running:
trace_mark(kernel_sched_wakeup,
"pid %d state %ld ## rq %p task %p rq->curr %p",
p->pid, p->state, rq, p, rq->curr);
check_preempt_curr(rq, p);
p->state = TASK_RUNNING;
@ -2279,6 +2301,9 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
p->sched_class->task_new(rq, p);
inc_nr_running(p, rq);
}
trace_mark(kernel_sched_wakeup_new,
"pid %d state %ld ## rq %p task %p rq->curr %p",
p->pid, p->state, rq, p, rq->curr);
check_preempt_curr(rq, p);
#ifdef CONFIG_SMP
if (p->sched_class->task_wake_up)
@ -2451,6 +2476,11 @@ context_switch(struct rq *rq, struct task_struct *prev,
struct mm_struct *mm, *oldmm;
prepare_task_switch(rq, prev, next);
trace_mark(kernel_sched_schedule,
"prev_pid %d next_pid %d prev_state %ld "
"## rq %p prev %p next %p",
prev->pid, next->pid, prev->state,
rq, prev, next);
mm = next->mm;
oldmm = prev->active_mm;
/*
@ -4021,26 +4051,44 @@ void scheduler_tick(void)
#endif
}
#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
defined(CONFIG_PREEMPT_TRACER))
static inline unsigned long get_parent_ip(unsigned long addr)
{
if (in_lock_functions(addr)) {
addr = CALLER_ADDR2;
if (in_lock_functions(addr))
addr = CALLER_ADDR3;
}
return addr;
}
void __kprobes add_preempt_count(int val)
{
#ifdef CONFIG_DEBUG_PREEMPT
/*
* Underflow?
*/
if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
return;
#endif
preempt_count() += val;
#ifdef CONFIG_DEBUG_PREEMPT
/*
* Spinlock count overflowing soon?
*/
DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
PREEMPT_MASK - 10);
#endif
if (preempt_count() == val)
trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
}
EXPORT_SYMBOL(add_preempt_count);
void __kprobes sub_preempt_count(int val)
{
#ifdef CONFIG_DEBUG_PREEMPT
/*
* Underflow?
*/
@ -4052,7 +4100,10 @@ void __kprobes sub_preempt_count(int val)
if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
!(preempt_count() & PREEMPT_MASK)))
return;
#endif
if (preempt_count() == val)
trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
preempt_count() -= val;
}
EXPORT_SYMBOL(sub_preempt_count);
@ -5384,7 +5435,7 @@ out_unlock:
return retval;
}
static const char stat_nam[] = "RSDTtZX";
static const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
void sched_show_task(struct task_struct *p)
{

View file

@ -31,6 +31,7 @@
#include <linux/sched.h>
#include <linux/semaphore.h>
#include <linux/spinlock.h>
#include <linux/ftrace.h>
static noinline void __down(struct semaphore *sem);
static noinline int __down_interruptible(struct semaphore *sem);

View file

@ -436,7 +436,7 @@ int __lockfunc _spin_trylock_bh(spinlock_t *lock)
}
EXPORT_SYMBOL(_spin_trylock_bh);
int in_lock_functions(unsigned long addr)
notrace int in_lock_functions(unsigned long addr)
{
/* Linker adds these: start and end of __lockfunc functions */
extern char __lock_text_start[], __lock_text_end[];

View file

@ -46,6 +46,7 @@
#include <linux/nfs_fs.h>
#include <linux/acpi.h>
#include <linux/reboot.h>
#include <linux/ftrace.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
@ -455,6 +456,16 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
#ifdef CONFIG_FTRACE
{
.ctl_name = CTL_UNNUMBERED,
.procname = "ftrace_enabled",
.data = &ftrace_enabled,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &ftrace_enable_sysctl,
},
#endif
#ifdef CONFIG_KMOD
{
.ctl_name = KERN_MODPROBE,

127
kernel/trace/Kconfig Normal file
View file

@ -0,0 +1,127 @@
#
# Architectures that offer an FTRACE implementation should select HAVE_FTRACE:
#
config HAVE_FTRACE
bool
config HAVE_DYNAMIC_FTRACE
bool
config TRACER_MAX_TRACE
bool
config TRACING
bool
select DEBUG_FS
select STACKTRACE
config FTRACE
bool "Kernel Function Tracer"
depends on HAVE_FTRACE
select FRAME_POINTER
select TRACING
select CONTEXT_SWITCH_TRACER
help
Enable the kernel to trace every kernel function. This is done
by using a compiler feature to insert a small, 5-byte No-Operation
instruction to the beginning of every kernel function, which NOP
sequence is then dynamically patched into a tracer call when
tracing is enabled by the administrator. If it's runtime disabled
(the bootup default), then the overhead of the instructions is very
small and not measurable even in micro-benchmarks.
config IRQSOFF_TRACER
bool "Interrupts-off Latency Tracer"
default n
depends on TRACE_IRQFLAGS_SUPPORT
depends on GENERIC_TIME
depends on HAVE_FTRACE
select TRACE_IRQFLAGS
select TRACING
select TRACER_MAX_TRACE
help
This option measures the time spent in irqs-off critical
sections, with microsecond accuracy.
The default measurement method is a maximum search, which is
disabled by default and can be runtime (re-)started
via:
echo 0 > /debugfs/tracing/tracing_max_latency
(Note that kernel size and overhead increases with this option
enabled. This option and the preempt-off timing option can be
used together or separately.)
config PREEMPT_TRACER
bool "Preemption-off Latency Tracer"
default n
depends on GENERIC_TIME
depends on PREEMPT
depends on HAVE_FTRACE
select TRACING
select TRACER_MAX_TRACE
help
This option measures the time spent in preemption off critical
sections, with microsecond accuracy.
The default measurement method is a maximum search, which is
disabled by default and can be runtime (re-)started
via:
echo 0 > /debugfs/tracing/tracing_max_latency
(Note that kernel size and overhead increases with this option
enabled. This option and the irqs-off timing option can be
used together or separately.)
config SCHED_TRACER
bool "Scheduling Latency Tracer"
depends on HAVE_FTRACE
select TRACING
select CONTEXT_SWITCH_TRACER
select TRACER_MAX_TRACE
help
This tracer tracks the latency of the highest priority task
to be scheduled in, starting from the point it has woken up.
config CONTEXT_SWITCH_TRACER
bool "Trace process context switches"
depends on HAVE_FTRACE
select TRACING
select MARKERS
help
This tracer gets called from the context switch and records
all switching of tasks.
config DYNAMIC_FTRACE
bool "enable/disable ftrace tracepoints dynamically"
depends on FTRACE
depends on HAVE_DYNAMIC_FTRACE
default y
help
This option will modify all the calls to ftrace dynamically
(will patch them out of the binary image and replaces them
with a No-Op instruction) as they are called. A table is
created to dynamically enable them again.
This way a CONFIG_FTRACE kernel is slightly larger, but otherwise
has native performance as long as no tracing is active.
The changes to the code are done by a kernel thread that
wakes up once a second and checks to see if any ftrace calls
were made. If so, it runs stop_machine (stops all CPUS)
and modifies the code to jump over the call to ftrace.
config FTRACE_SELFTEST
bool
config FTRACE_STARTUP_TEST
bool "Perform a startup test on ftrace"
depends on TRACING
select FTRACE_SELFTEST
help
This option performs a series of startup tests on ftrace. On bootup
a series of tests are made to verify that the tracer is
functioning properly. It will do tests on all the configured
tracers of ftrace.

22
kernel/trace/Makefile Normal file
View file

@ -0,0 +1,22 @@
# Do not instrument the tracer itself:
ifdef CONFIG_FTRACE
ORIG_CFLAGS := $(KBUILD_CFLAGS)
KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
# selftest needs instrumentation
CFLAGS_trace_selftest_dynamic.o = -pg
obj-y += trace_selftest_dynamic.o
endif
obj-$(CONFIG_FTRACE) += libftrace.o
obj-$(CONFIG_TRACING) += trace.o
obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
obj-$(CONFIG_FTRACE) += trace_functions.o
obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
libftrace-y := ftrace.o

1710
kernel/trace/ftrace.c Normal file

File diff suppressed because it is too large Load diff

3100
kernel/trace/trace.c Normal file

File diff suppressed because it is too large Load diff

313
kernel/trace/trace.h Normal file
View file

@ -0,0 +1,313 @@
#ifndef _LINUX_KERNEL_TRACE_H
#define _LINUX_KERNEL_TRACE_H
#include <linux/fs.h>
#include <asm/atomic.h>
#include <linux/sched.h>
#include <linux/clocksource.h>
enum trace_type {
__TRACE_FIRST_TYPE = 0,
TRACE_FN,
TRACE_CTX,
TRACE_WAKE,
TRACE_STACK,
TRACE_SPECIAL,
__TRACE_LAST_TYPE
};
/*
* Function trace entry - function address and parent function addres:
*/
struct ftrace_entry {
unsigned long ip;
unsigned long parent_ip;
};
/*
* Context switch trace entry - which task (and prio) we switched from/to:
*/
struct ctx_switch_entry {
unsigned int prev_pid;
unsigned char prev_prio;
unsigned char prev_state;
unsigned int next_pid;
unsigned char next_prio;
unsigned char next_state;
};
/*
* Special (free-form) trace entry:
*/
struct special_entry {
unsigned long arg1;
unsigned long arg2;
unsigned long arg3;
};
/*
* Stack-trace entry:
*/
#define FTRACE_STACK_ENTRIES 8
struct stack_entry {
unsigned long caller[FTRACE_STACK_ENTRIES];
};
/*
* The trace entry - the most basic unit of tracing. This is what
* is printed in the end as a single line in the trace output, such as:
*
* bash-15816 [01] 235.197585: idle_cpu <- irq_enter
*/
struct trace_entry {
char type;
char cpu;
char flags;
char preempt_count;
int pid;
cycle_t t;
union {
struct ftrace_entry fn;
struct ctx_switch_entry ctx;
struct special_entry special;
struct stack_entry stack;
};
};
#define TRACE_ENTRY_SIZE sizeof(struct trace_entry)
/*
* The CPU trace array - it consists of thousands of trace entries
* plus some other descriptor data: (for example which task started
* the trace, etc.)
*/
struct trace_array_cpu {
struct list_head trace_pages;
atomic_t disabled;
raw_spinlock_t lock;
struct lock_class_key lock_key;
/* these fields get copied into max-trace: */
unsigned trace_head_idx;
unsigned trace_tail_idx;
void *trace_head; /* producer */
void *trace_tail; /* consumer */
unsigned long trace_idx;
unsigned long overrun;
unsigned long saved_latency;
unsigned long critical_start;
unsigned long critical_end;
unsigned long critical_sequence;
unsigned long nice;
unsigned long policy;
unsigned long rt_priority;
cycle_t preempt_timestamp;
pid_t pid;
uid_t uid;
char comm[TASK_COMM_LEN];
};
struct trace_iterator;
/*
* The trace array - an array of per-CPU trace arrays. This is the
* highest level data structure that individual tracers deal with.
* They have on/off state as well:
*/
struct trace_array {
unsigned long entries;
long ctrl;
int cpu;
cycle_t time_start;
struct task_struct *waiter;
struct trace_array_cpu *data[NR_CPUS];
};
/*
* A specific tracer, represented by methods that operate on a trace array:
*/
struct tracer {
const char *name;
void (*init)(struct trace_array *tr);
void (*reset)(struct trace_array *tr);
void (*open)(struct trace_iterator *iter);
void (*pipe_open)(struct trace_iterator *iter);
void (*close)(struct trace_iterator *iter);
void (*start)(struct trace_iterator *iter);
void (*stop)(struct trace_iterator *iter);
ssize_t (*read)(struct trace_iterator *iter,
struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos);
void (*ctrl_update)(struct trace_array *tr);
#ifdef CONFIG_FTRACE_STARTUP_TEST
int (*selftest)(struct tracer *trace,
struct trace_array *tr);
#endif
int (*print_line)(struct trace_iterator *iter);
struct tracer *next;
int print_max;
};
struct trace_seq {
unsigned char buffer[PAGE_SIZE];
unsigned int len;
unsigned int readpos;
};
/*
* Trace iterator - used by printout routines who present trace
* results to users and which routines might sleep, etc:
*/
struct trace_iterator {
struct trace_array *tr;
struct tracer *trace;
void *private;
long last_overrun[NR_CPUS];
long overrun[NR_CPUS];
/* The below is zeroed out in pipe_read */
struct trace_seq seq;
struct trace_entry *ent;
int cpu;
struct trace_entry *prev_ent;
int prev_cpu;
unsigned long iter_flags;
loff_t pos;
unsigned long next_idx[NR_CPUS];
struct list_head *next_page[NR_CPUS];
unsigned next_page_idx[NR_CPUS];
long idx;
};
void tracing_reset(struct trace_array_cpu *data);
int tracing_open_generic(struct inode *inode, struct file *filp);
struct dentry *tracing_init_dentry(void);
void ftrace(struct trace_array *tr,
struct trace_array_cpu *data,
unsigned long ip,
unsigned long parent_ip,
unsigned long flags);
void tracing_sched_switch_trace(struct trace_array *tr,
struct trace_array_cpu *data,
struct task_struct *prev,
struct task_struct *next,
unsigned long flags);
void tracing_record_cmdline(struct task_struct *tsk);
void tracing_sched_wakeup_trace(struct trace_array *tr,
struct trace_array_cpu *data,
struct task_struct *wakee,
struct task_struct *cur,
unsigned long flags);
void trace_special(struct trace_array *tr,
struct trace_array_cpu *data,
unsigned long arg1,
unsigned long arg2,
unsigned long arg3);
void trace_function(struct trace_array *tr,
struct trace_array_cpu *data,
unsigned long ip,
unsigned long parent_ip,
unsigned long flags);
void tracing_start_function_trace(void);
void tracing_stop_function_trace(void);
void tracing_start_cmdline_record(void);
void tracing_stop_cmdline_record(void);
int register_tracer(struct tracer *type);
void unregister_tracer(struct tracer *type);
extern unsigned long nsecs_to_usecs(unsigned long nsecs);
extern unsigned long tracing_max_latency;
extern unsigned long tracing_thresh;
void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
void update_max_tr_single(struct trace_array *tr,
struct task_struct *tsk, int cpu);
extern cycle_t ftrace_now(int cpu);
#ifdef CONFIG_CONTEXT_SWITCH_TRACER
typedef void
(*tracer_switch_func_t)(void *private,
void *__rq,
struct task_struct *prev,
struct task_struct *next);
struct tracer_switch_ops {
tracer_switch_func_t func;
void *private;
struct tracer_switch_ops *next;
};
#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
#ifdef CONFIG_DYNAMIC_FTRACE
extern unsigned long ftrace_update_tot_cnt;
#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
extern int DYN_FTRACE_TEST_NAME(void);
#endif
#ifdef CONFIG_FTRACE_STARTUP_TEST
#ifdef CONFIG_FTRACE
extern int trace_selftest_startup_function(struct tracer *trace,
struct trace_array *tr);
#endif
#ifdef CONFIG_IRQSOFF_TRACER
extern int trace_selftest_startup_irqsoff(struct tracer *trace,
struct trace_array *tr);
#endif
#ifdef CONFIG_PREEMPT_TRACER
extern int trace_selftest_startup_preemptoff(struct tracer *trace,
struct trace_array *tr);
#endif
#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace,
struct trace_array *tr);
#endif
#ifdef CONFIG_SCHED_TRACER
extern int trace_selftest_startup_wakeup(struct tracer *trace,
struct trace_array *tr);
#endif
#ifdef CONFIG_CONTEXT_SWITCH_TRACER
extern int trace_selftest_startup_sched_switch(struct tracer *trace,
struct trace_array *tr);
#endif
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
size_t cnt);
extern long ns2usecs(cycle_t nsec);
extern unsigned long trace_flags;
/*
* trace_iterator_flags is an enumeration that defines bit
* positions into trace_flags that controls the output.
*
* NOTE: These bits must match the trace_options array in
* trace.c.
*/
enum trace_iterator_flags {
TRACE_ITER_PRINT_PARENT = 0x01,
TRACE_ITER_SYM_OFFSET = 0x02,
TRACE_ITER_SYM_ADDR = 0x04,
TRACE_ITER_VERBOSE = 0x08,
TRACE_ITER_RAW = 0x10,
TRACE_ITER_HEX = 0x20,
TRACE_ITER_BIN = 0x40,
TRACE_ITER_BLOCK = 0x80,
TRACE_ITER_STACKTRACE = 0x100,
TRACE_ITER_SCHED_TREE = 0x200,
};
#endif /* _LINUX_KERNEL_TRACE_H */

View file

@ -0,0 +1,78 @@
/*
* ring buffer based function tracer
*
* Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
* Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
*
* Based on code from the latency_tracer, that is:
*
* Copyright (C) 2004-2006 Ingo Molnar
* Copyright (C) 2004 William Lee Irwin III
*/
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/fs.h>
#include "trace.h"
static void function_reset(struct trace_array *tr)
{
int cpu;
tr->time_start = ftrace_now(tr->cpu);
for_each_online_cpu(cpu)
tracing_reset(tr->data[cpu]);
}
static void start_function_trace(struct trace_array *tr)
{
function_reset(tr);
tracing_start_cmdline_record();
tracing_start_function_trace();
}
static void stop_function_trace(struct trace_array *tr)
{
tracing_stop_function_trace();
tracing_stop_cmdline_record();
}
static void function_trace_init(struct trace_array *tr)
{
if (tr->ctrl)
start_function_trace(tr);
}
static void function_trace_reset(struct trace_array *tr)
{
if (tr->ctrl)
stop_function_trace(tr);
}
static void function_trace_ctrl_update(struct trace_array *tr)
{
if (tr->ctrl)
start_function_trace(tr);
else
stop_function_trace(tr);
}
static struct tracer function_trace __read_mostly =
{
.name = "ftrace",
.init = function_trace_init,
.reset = function_trace_reset,
.ctrl_update = function_trace_ctrl_update,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_function,
#endif
};
static __init int init_function_trace(void)
{
return register_tracer(&function_trace);
}
device_initcall(init_function_trace);

View file

@ -0,0 +1,486 @@
/*
* trace irqs off criticall timings
*
* Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
* Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
*
* From code in the latency_tracer, that is:
*
* Copyright (C) 2004-2006 Ingo Molnar
* Copyright (C) 2004 William Lee Irwin III
*/
#include <linux/kallsyms.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ftrace.h>
#include <linux/fs.h>
#include "trace.h"
static struct trace_array *irqsoff_trace __read_mostly;
static int tracer_enabled __read_mostly;
static DEFINE_PER_CPU(int, tracing_cpu);
static DEFINE_SPINLOCK(max_trace_lock);
enum {
TRACER_IRQS_OFF = (1 << 1),
TRACER_PREEMPT_OFF = (1 << 2),
};
static int trace_type __read_mostly;
#ifdef CONFIG_PREEMPT_TRACER
static inline int
preempt_trace(void)
{
return ((trace_type & TRACER_PREEMPT_OFF) && preempt_count());
}
#else
# define preempt_trace() (0)
#endif
#ifdef CONFIG_IRQSOFF_TRACER
static inline int
irq_trace(void)
{
return ((trace_type & TRACER_IRQS_OFF) &&
irqs_disabled());
}
#else
# define irq_trace() (0)
#endif
/*
* Sequence count - we record it when starting a measurement and
* skip the latency if the sequence has changed - some other section
* did a maximum and could disturb our measurement with serial console
* printouts, etc. Truly coinciding maximum latencies should be rare
* and what happens together happens separately as well, so this doesnt
* decrease the validity of the maximum found:
*/
static __cacheline_aligned_in_smp unsigned long max_sequence;
#ifdef CONFIG_FTRACE
/*
* irqsoff uses its own tracer function to keep the overhead down:
*/
static void
irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
{
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
int cpu;
/*
* Does not matter if we preempt. We test the flags
* afterward, to see if irqs are disabled or not.
* If we preempt and get a false positive, the flags
* test will fail.
*/
cpu = raw_smp_processor_id();
if (likely(!per_cpu(tracing_cpu, cpu)))
return;
local_save_flags(flags);
/* slight chance to get a false positive on tracing_cpu */
if (!irqs_disabled_flags(flags))
return;
data = tr->data[cpu];
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1))
trace_function(tr, data, ip, parent_ip, flags);
atomic_dec(&data->disabled);
}
static struct ftrace_ops trace_ops __read_mostly =
{
.func = irqsoff_tracer_call,
};
#endif /* CONFIG_FTRACE */
/*
* Should this new latency be reported/recorded?
*/
static int report_latency(cycle_t delta)
{
if (tracing_thresh) {
if (delta < tracing_thresh)
return 0;
} else {
if (delta <= tracing_max_latency)
return 0;
}
return 1;
}
static void
check_critical_timing(struct trace_array *tr,
struct trace_array_cpu *data,
unsigned long parent_ip,
int cpu)
{
unsigned long latency, t0, t1;
cycle_t T0, T1, delta;
unsigned long flags;
/*
* usecs conversion is slow so we try to delay the conversion
* as long as possible:
*/
T0 = data->preempt_timestamp;
T1 = ftrace_now(cpu);
delta = T1-T0;
local_save_flags(flags);
if (!report_latency(delta))
goto out;
spin_lock_irqsave(&max_trace_lock, flags);
/* check if we are still the max latency */
if (!report_latency(delta))
goto out_unlock;
trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
latency = nsecs_to_usecs(delta);
if (data->critical_sequence != max_sequence)
goto out_unlock;
tracing_max_latency = delta;
t0 = nsecs_to_usecs(T0);
t1 = nsecs_to_usecs(T1);
data->critical_end = parent_ip;
update_max_tr_single(tr, current, cpu);
max_sequence++;
out_unlock:
spin_unlock_irqrestore(&max_trace_lock, flags);
out:
data->critical_sequence = max_sequence;
data->preempt_timestamp = ftrace_now(cpu);
tracing_reset(data);
trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
}
static inline void
start_critical_timing(unsigned long ip, unsigned long parent_ip)
{
int cpu;
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
unsigned long flags;
if (likely(!tracer_enabled))
return;
cpu = raw_smp_processor_id();
if (per_cpu(tracing_cpu, cpu))
return;
data = tr->data[cpu];
if (unlikely(!data) || atomic_read(&data->disabled))
return;
atomic_inc(&data->disabled);
data->critical_sequence = max_sequence;
data->preempt_timestamp = ftrace_now(cpu);
data->critical_start = parent_ip ? : ip;
tracing_reset(data);
local_save_flags(flags);
trace_function(tr, data, ip, parent_ip, flags);
per_cpu(tracing_cpu, cpu) = 1;
atomic_dec(&data->disabled);
}
static inline void
stop_critical_timing(unsigned long ip, unsigned long parent_ip)
{
int cpu;
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
unsigned long flags;
cpu = raw_smp_processor_id();
/* Always clear the tracing cpu on stopping the trace */
if (unlikely(per_cpu(tracing_cpu, cpu)))
per_cpu(tracing_cpu, cpu) = 0;
else
return;
if (!tracer_enabled)
return;
data = tr->data[cpu];
if (unlikely(!data) || unlikely(!head_page(data)) ||
!data->critical_start || atomic_read(&data->disabled))
return;
atomic_inc(&data->disabled);
local_save_flags(flags);
trace_function(tr, data, ip, parent_ip, flags);
check_critical_timing(tr, data, parent_ip ? : ip, cpu);
data->critical_start = 0;
atomic_dec(&data->disabled);
}
/* start and stop critical timings used to for stoppage (in idle) */
void start_critical_timings(void)
{
if (preempt_trace() || irq_trace())
start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
void stop_critical_timings(void)
{
if (preempt_trace() || irq_trace())
stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
#ifdef CONFIG_IRQSOFF_TRACER
#ifdef CONFIG_PROVE_LOCKING
void time_hardirqs_on(unsigned long a0, unsigned long a1)
{
if (!preempt_trace() && irq_trace())
stop_critical_timing(a0, a1);
}
void time_hardirqs_off(unsigned long a0, unsigned long a1)
{
if (!preempt_trace() && irq_trace())
start_critical_timing(a0, a1);
}
#else /* !CONFIG_PROVE_LOCKING */
/*
* Stubs:
*/
void early_boot_irqs_off(void)
{
}
void early_boot_irqs_on(void)
{
}
void trace_softirqs_on(unsigned long ip)
{
}
void trace_softirqs_off(unsigned long ip)
{
}
inline void print_irqtrace_events(struct task_struct *curr)
{
}
/*
* We are only interested in hardirq on/off events:
*/
void trace_hardirqs_on(void)
{
if (!preempt_trace() && irq_trace())
stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
EXPORT_SYMBOL(trace_hardirqs_on);
void trace_hardirqs_off(void)
{
if (!preempt_trace() && irq_trace())
start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
EXPORT_SYMBOL(trace_hardirqs_off);
void trace_hardirqs_on_caller(unsigned long caller_addr)
{
if (!preempt_trace() && irq_trace())
stop_critical_timing(CALLER_ADDR0, caller_addr);
}
EXPORT_SYMBOL(trace_hardirqs_on_caller);
void trace_hardirqs_off_caller(unsigned long caller_addr)
{
if (!preempt_trace() && irq_trace())
start_critical_timing(CALLER_ADDR0, caller_addr);
}
EXPORT_SYMBOL(trace_hardirqs_off_caller);
#endif /* CONFIG_PROVE_LOCKING */
#endif /* CONFIG_IRQSOFF_TRACER */
#ifdef CONFIG_PREEMPT_TRACER
void trace_preempt_on(unsigned long a0, unsigned long a1)
{
stop_critical_timing(a0, a1);
}
void trace_preempt_off(unsigned long a0, unsigned long a1)
{
start_critical_timing(a0, a1);
}
#endif /* CONFIG_PREEMPT_TRACER */
static void start_irqsoff_tracer(struct trace_array *tr)
{
register_ftrace_function(&trace_ops);
tracer_enabled = 1;
}
static void stop_irqsoff_tracer(struct trace_array *tr)
{
tracer_enabled = 0;
unregister_ftrace_function(&trace_ops);
}
static void __irqsoff_tracer_init(struct trace_array *tr)
{
irqsoff_trace = tr;
/* make sure that the tracer is visible */
smp_wmb();
if (tr->ctrl)
start_irqsoff_tracer(tr);
}
static void irqsoff_tracer_reset(struct trace_array *tr)
{
if (tr->ctrl)
stop_irqsoff_tracer(tr);
}
static void irqsoff_tracer_ctrl_update(struct trace_array *tr)
{
if (tr->ctrl)
start_irqsoff_tracer(tr);
else
stop_irqsoff_tracer(tr);
}
static void irqsoff_tracer_open(struct trace_iterator *iter)
{
/* stop the trace while dumping */
if (iter->tr->ctrl)
stop_irqsoff_tracer(iter->tr);
}
static void irqsoff_tracer_close(struct trace_iterator *iter)
{
if (iter->tr->ctrl)
start_irqsoff_tracer(iter->tr);
}
#ifdef CONFIG_IRQSOFF_TRACER
static void irqsoff_tracer_init(struct trace_array *tr)
{
trace_type = TRACER_IRQS_OFF;
__irqsoff_tracer_init(tr);
}
static struct tracer irqsoff_tracer __read_mostly =
{
.name = "irqsoff",
.init = irqsoff_tracer_init,
.reset = irqsoff_tracer_reset,
.open = irqsoff_tracer_open,
.close = irqsoff_tracer_close,
.ctrl_update = irqsoff_tracer_ctrl_update,
.print_max = 1,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_irqsoff,
#endif
};
# define register_irqsoff(trace) register_tracer(&trace)
#else
# define register_irqsoff(trace) do { } while (0)
#endif
#ifdef CONFIG_PREEMPT_TRACER
static void preemptoff_tracer_init(struct trace_array *tr)
{
trace_type = TRACER_PREEMPT_OFF;
__irqsoff_tracer_init(tr);
}
static struct tracer preemptoff_tracer __read_mostly =
{
.name = "preemptoff",
.init = preemptoff_tracer_init,
.reset = irqsoff_tracer_reset,
.open = irqsoff_tracer_open,
.close = irqsoff_tracer_close,
.ctrl_update = irqsoff_tracer_ctrl_update,
.print_max = 1,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_preemptoff,
#endif
};
# define register_preemptoff(trace) register_tracer(&trace)
#else
# define register_preemptoff(trace) do { } while (0)
#endif
#if defined(CONFIG_IRQSOFF_TRACER) && \
defined(CONFIG_PREEMPT_TRACER)
static void preemptirqsoff_tracer_init(struct trace_array *tr)
{
trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
__irqsoff_tracer_init(tr);
}
static struct tracer preemptirqsoff_tracer __read_mostly =
{
.name = "preemptirqsoff",
.init = preemptirqsoff_tracer_init,
.reset = irqsoff_tracer_reset,
.open = irqsoff_tracer_open,
.close = irqsoff_tracer_close,
.ctrl_update = irqsoff_tracer_ctrl_update,
.print_max = 1,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_preemptirqsoff,
#endif
};
# define register_preemptirqsoff(trace) register_tracer(&trace)
#else
# define register_preemptirqsoff(trace) do { } while (0)
#endif
__init static int init_irqsoff_tracer(void)
{
register_irqsoff(irqsoff_tracer);
register_preemptoff(preemptoff_tracer);
register_preemptirqsoff(preemptirqsoff_tracer);
return 0;
}
device_initcall(init_irqsoff_tracer);

View file

@ -0,0 +1,286 @@
/*
* trace context switch
*
* Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
*
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/marker.h>
#include <linux/ftrace.h>
#include "trace.h"
static struct trace_array *ctx_trace;
static int __read_mostly tracer_enabled;
static atomic_t sched_ref;
static void
sched_switch_func(void *private, void *__rq, struct task_struct *prev,
struct task_struct *next)
{
struct trace_array **ptr = private;
struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
int cpu;
tracing_record_cmdline(prev);
tracing_record_cmdline(next);
if (!tracer_enabled)
return;
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1))
tracing_sched_switch_trace(tr, data, prev, next, flags);
atomic_dec(&data->disabled);
local_irq_restore(flags);
}
static notrace void
sched_switch_callback(void *probe_data, void *call_data,
const char *format, va_list *args)
{
struct task_struct *prev;
struct task_struct *next;
struct rq *__rq;
if (!atomic_read(&sched_ref))
return;
/* skip prev_pid %d next_pid %d prev_state %ld */
(void)va_arg(*args, int);
(void)va_arg(*args, int);
(void)va_arg(*args, long);
__rq = va_arg(*args, typeof(__rq));
prev = va_arg(*args, typeof(prev));
next = va_arg(*args, typeof(next));
/*
* If tracer_switch_func only points to the local
* switch func, it still needs the ptr passed to it.
*/
sched_switch_func(probe_data, __rq, prev, next);
}
static void
wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct
task_struct *curr)
{
struct trace_array **ptr = private;
struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
int cpu;
if (!tracer_enabled)
return;
tracing_record_cmdline(curr);
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1))
tracing_sched_wakeup_trace(tr, data, wakee, curr, flags);
atomic_dec(&data->disabled);
local_irq_restore(flags);
}
static notrace void
wake_up_callback(void *probe_data, void *call_data,
const char *format, va_list *args)
{
struct task_struct *curr;
struct task_struct *task;
struct rq *__rq;
if (likely(!tracer_enabled))
return;
/* Skip pid %d state %ld */
(void)va_arg(*args, int);
(void)va_arg(*args, long);
/* now get the meat: "rq %p task %p rq->curr %p" */
__rq = va_arg(*args, typeof(__rq));
task = va_arg(*args, typeof(task));
curr = va_arg(*args, typeof(curr));
tracing_record_cmdline(task);
tracing_record_cmdline(curr);
wakeup_func(probe_data, __rq, task, curr);
}
static void sched_switch_reset(struct trace_array *tr)
{
int cpu;
tr->time_start = ftrace_now(tr->cpu);
for_each_online_cpu(cpu)
tracing_reset(tr->data[cpu]);
}
static int tracing_sched_register(void)
{
int ret;
ret = marker_probe_register("kernel_sched_wakeup",
"pid %d state %ld ## rq %p task %p rq->curr %p",
wake_up_callback,
&ctx_trace);
if (ret) {
pr_info("wakeup trace: Couldn't add marker"
" probe to kernel_sched_wakeup\n");
return ret;
}
ret = marker_probe_register("kernel_sched_wakeup_new",
"pid %d state %ld ## rq %p task %p rq->curr %p",
wake_up_callback,
&ctx_trace);
if (ret) {
pr_info("wakeup trace: Couldn't add marker"
" probe to kernel_sched_wakeup_new\n");
goto fail_deprobe;
}
ret = marker_probe_register("kernel_sched_schedule",
"prev_pid %d next_pid %d prev_state %ld "
"## rq %p prev %p next %p",
sched_switch_callback,
&ctx_trace);
if (ret) {
pr_info("sched trace: Couldn't add marker"
" probe to kernel_sched_schedule\n");
goto fail_deprobe_wake_new;
}
return ret;
fail_deprobe_wake_new:
marker_probe_unregister("kernel_sched_wakeup_new",
wake_up_callback,
&ctx_trace);
fail_deprobe:
marker_probe_unregister("kernel_sched_wakeup",
wake_up_callback,
&ctx_trace);
return ret;
}
static void tracing_sched_unregister(void)
{
marker_probe_unregister("kernel_sched_schedule",
sched_switch_callback,
&ctx_trace);
marker_probe_unregister("kernel_sched_wakeup_new",
wake_up_callback,
&ctx_trace);
marker_probe_unregister("kernel_sched_wakeup",
wake_up_callback,
&ctx_trace);
}
static void tracing_start_sched_switch(void)
{
long ref;
ref = atomic_inc_return(&sched_ref);
if (ref == 1)
tracing_sched_register();
}
static void tracing_stop_sched_switch(void)
{
long ref;
ref = atomic_dec_and_test(&sched_ref);
if (ref)
tracing_sched_unregister();
}
void tracing_start_cmdline_record(void)
{
tracing_start_sched_switch();
}
void tracing_stop_cmdline_record(void)
{
tracing_stop_sched_switch();
}
static void start_sched_trace(struct trace_array *tr)
{
sched_switch_reset(tr);
tracer_enabled = 1;
tracing_start_cmdline_record();
}
static void stop_sched_trace(struct trace_array *tr)
{
tracing_stop_cmdline_record();
tracer_enabled = 0;
}
static void sched_switch_trace_init(struct trace_array *tr)
{
ctx_trace = tr;
if (tr->ctrl)
start_sched_trace(tr);
}
static void sched_switch_trace_reset(struct trace_array *tr)
{
if (tr->ctrl)
stop_sched_trace(tr);
}
static void sched_switch_trace_ctrl_update(struct trace_array *tr)
{
/* When starting a new trace, reset the buffers */
if (tr->ctrl)
start_sched_trace(tr);
else
stop_sched_trace(tr);
}
static struct tracer sched_switch_trace __read_mostly =
{
.name = "sched_switch",
.init = sched_switch_trace_init,
.reset = sched_switch_trace_reset,
.ctrl_update = sched_switch_trace_ctrl_update,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_sched_switch,
#endif
};
__init static int init_sched_switch_trace(void)
{
int ret = 0;
if (atomic_read(&sched_ref))
ret = tracing_sched_register();
if (ret) {
pr_info("error registering scheduler trace\n");
return ret;
}
return register_tracer(&sched_switch_trace);
}
device_initcall(init_sched_switch_trace);

View file

@ -0,0 +1,447 @@
/*
* trace task wakeup timings
*
* Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
* Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
*
* Based on code from the latency_tracer, that is:
*
* Copyright (C) 2004-2006 Ingo Molnar
* Copyright (C) 2004 William Lee Irwin III
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/marker.h>
#include "trace.h"
static struct trace_array *wakeup_trace;
static int __read_mostly tracer_enabled;
static struct task_struct *wakeup_task;
static int wakeup_cpu;
static unsigned wakeup_prio = -1;
static DEFINE_SPINLOCK(wakeup_lock);
static void __wakeup_reset(struct trace_array *tr);
#ifdef CONFIG_FTRACE
/*
* irqsoff uses its own tracer function to keep the overhead down:
*/
static void
wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
{
struct trace_array *tr = wakeup_trace;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
int resched;
int cpu;
if (likely(!wakeup_task))
return;
resched = need_resched();
preempt_disable_notrace();
cpu = raw_smp_processor_id();
data = tr->data[cpu];
disabled = atomic_inc_return(&data->disabled);
if (unlikely(disabled != 1))
goto out;
spin_lock_irqsave(&wakeup_lock, flags);
if (unlikely(!wakeup_task))
goto unlock;
/*
* The task can't disappear because it needs to
* wake up first, and we have the wakeup_lock.
*/
if (task_cpu(wakeup_task) != cpu)
goto unlock;
trace_function(tr, data, ip, parent_ip, flags);
unlock:
spin_unlock_irqrestore(&wakeup_lock, flags);
out:
atomic_dec(&data->disabled);
/*
* To prevent recursion from the scheduler, if the
* resched flag was set before we entered, then
* don't reschedule.
*/
if (resched)
preempt_enable_no_resched_notrace();
else
preempt_enable_notrace();
}
static struct ftrace_ops trace_ops __read_mostly =
{
.func = wakeup_tracer_call,
};
#endif /* CONFIG_FTRACE */
/*
* Should this new latency be reported/recorded?
*/
static int report_latency(cycle_t delta)
{
if (tracing_thresh) {
if (delta < tracing_thresh)
return 0;
} else {
if (delta <= tracing_max_latency)
return 0;
}
return 1;
}
static void notrace
wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
struct task_struct *next)
{
unsigned long latency = 0, t0 = 0, t1 = 0;
struct trace_array **ptr = private;
struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
cycle_t T0, T1, delta;
unsigned long flags;
long disabled;
int cpu;
if (unlikely(!tracer_enabled))
return;
/*
* When we start a new trace, we set wakeup_task to NULL
* and then set tracer_enabled = 1. We want to make sure
* that another CPU does not see the tracer_enabled = 1
* and the wakeup_task with an older task, that might
* actually be the same as next.
*/
smp_rmb();
if (next != wakeup_task)
return;
/* The task we are waiting for is waking up */
data = tr->data[wakeup_cpu];
/* disable local data, not wakeup_cpu data */
cpu = raw_smp_processor_id();
disabled = atomic_inc_return(&tr->data[cpu]->disabled);
if (likely(disabled != 1))
goto out;
spin_lock_irqsave(&wakeup_lock, flags);
/* We could race with grabbing wakeup_lock */
if (unlikely(!tracer_enabled || next != wakeup_task))
goto out_unlock;
trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags);
/*
* usecs conversion is slow so we try to delay the conversion
* as long as possible:
*/
T0 = data->preempt_timestamp;
T1 = ftrace_now(cpu);
delta = T1-T0;
if (!report_latency(delta))
goto out_unlock;
latency = nsecs_to_usecs(delta);
tracing_max_latency = delta;
t0 = nsecs_to_usecs(T0);
t1 = nsecs_to_usecs(T1);
update_max_tr(tr, wakeup_task, wakeup_cpu);
out_unlock:
__wakeup_reset(tr);
spin_unlock_irqrestore(&wakeup_lock, flags);
out:
atomic_dec(&tr->data[cpu]->disabled);
}
static notrace void
sched_switch_callback(void *probe_data, void *call_data,
const char *format, va_list *args)
{
struct task_struct *prev;
struct task_struct *next;
struct rq *__rq;
/* skip prev_pid %d next_pid %d prev_state %ld */
(void)va_arg(*args, int);
(void)va_arg(*args, int);
(void)va_arg(*args, long);
__rq = va_arg(*args, typeof(__rq));
prev = va_arg(*args, typeof(prev));
next = va_arg(*args, typeof(next));
tracing_record_cmdline(prev);
/*
* If tracer_switch_func only points to the local
* switch func, it still needs the ptr passed to it.
*/
wakeup_sched_switch(probe_data, __rq, prev, next);
}
static void __wakeup_reset(struct trace_array *tr)
{
struct trace_array_cpu *data;
int cpu;
assert_spin_locked(&wakeup_lock);
for_each_possible_cpu(cpu) {
data = tr->data[cpu];
tracing_reset(data);
}
wakeup_cpu = -1;
wakeup_prio = -1;
if (wakeup_task)
put_task_struct(wakeup_task);
wakeup_task = NULL;
}
static void wakeup_reset(struct trace_array *tr)
{
unsigned long flags;
spin_lock_irqsave(&wakeup_lock, flags);
__wakeup_reset(tr);
spin_unlock_irqrestore(&wakeup_lock, flags);
}
static void
wakeup_check_start(struct trace_array *tr, struct task_struct *p,
struct task_struct *curr)
{
int cpu = smp_processor_id();
unsigned long flags;
long disabled;
if (likely(!rt_task(p)) ||
p->prio >= wakeup_prio ||
p->prio >= curr->prio)
return;
disabled = atomic_inc_return(&tr->data[cpu]->disabled);
if (unlikely(disabled != 1))
goto out;
/* interrupts should be off from try_to_wake_up */
spin_lock(&wakeup_lock);
/* check for races. */
if (!tracer_enabled || p->prio >= wakeup_prio)
goto out_locked;
/* reset the trace */
__wakeup_reset(tr);
wakeup_cpu = task_cpu(p);
wakeup_prio = p->prio;
wakeup_task = p;
get_task_struct(wakeup_task);
local_save_flags(flags);
tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
trace_function(tr, tr->data[wakeup_cpu],
CALLER_ADDR1, CALLER_ADDR2, flags);
out_locked:
spin_unlock(&wakeup_lock);
out:
atomic_dec(&tr->data[cpu]->disabled);
}
static notrace void
wake_up_callback(void *probe_data, void *call_data,
const char *format, va_list *args)
{
struct trace_array **ptr = probe_data;
struct trace_array *tr = *ptr;
struct task_struct *curr;
struct task_struct *task;
struct rq *__rq;
if (likely(!tracer_enabled))
return;
/* Skip pid %d state %ld */
(void)va_arg(*args, int);
(void)va_arg(*args, long);
/* now get the meat: "rq %p task %p rq->curr %p" */
__rq = va_arg(*args, typeof(__rq));
task = va_arg(*args, typeof(task));
curr = va_arg(*args, typeof(curr));
tracing_record_cmdline(task);
tracing_record_cmdline(curr);
wakeup_check_start(tr, task, curr);
}
static void start_wakeup_tracer(struct trace_array *tr)
{
int ret;
ret = marker_probe_register("kernel_sched_wakeup",
"pid %d state %ld ## rq %p task %p rq->curr %p",
wake_up_callback,
&wakeup_trace);
if (ret) {
pr_info("wakeup trace: Couldn't add marker"
" probe to kernel_sched_wakeup\n");
return;
}
ret = marker_probe_register("kernel_sched_wakeup_new",
"pid %d state %ld ## rq %p task %p rq->curr %p",
wake_up_callback,
&wakeup_trace);
if (ret) {
pr_info("wakeup trace: Couldn't add marker"
" probe to kernel_sched_wakeup_new\n");
goto fail_deprobe;
}
ret = marker_probe_register("kernel_sched_schedule",
"prev_pid %d next_pid %d prev_state %ld "
"## rq %p prev %p next %p",
sched_switch_callback,
&wakeup_trace);
if (ret) {
pr_info("sched trace: Couldn't add marker"
" probe to kernel_sched_schedule\n");
goto fail_deprobe_wake_new;
}
wakeup_reset(tr);
/*
* Don't let the tracer_enabled = 1 show up before
* the wakeup_task is reset. This may be overkill since
* wakeup_reset does a spin_unlock after setting the
* wakeup_task to NULL, but I want to be safe.
* This is a slow path anyway.
*/
smp_wmb();
tracer_enabled = 1;
register_ftrace_function(&trace_ops);
return;
fail_deprobe_wake_new:
marker_probe_unregister("kernel_sched_wakeup_new",
wake_up_callback,
&wakeup_trace);
fail_deprobe:
marker_probe_unregister("kernel_sched_wakeup",
wake_up_callback,
&wakeup_trace);
}
static void stop_wakeup_tracer(struct trace_array *tr)
{
tracer_enabled = 0;
unregister_ftrace_function(&trace_ops);
marker_probe_unregister("kernel_sched_schedule",
sched_switch_callback,
&wakeup_trace);
marker_probe_unregister("kernel_sched_wakeup_new",
wake_up_callback,
&wakeup_trace);
marker_probe_unregister("kernel_sched_wakeup",
wake_up_callback,
&wakeup_trace);
}
static void wakeup_tracer_init(struct trace_array *tr)
{
wakeup_trace = tr;
if (tr->ctrl)
start_wakeup_tracer(tr);
}
static void wakeup_tracer_reset(struct trace_array *tr)
{
if (tr->ctrl) {
stop_wakeup_tracer(tr);
/* make sure we put back any tasks we are tracing */
wakeup_reset(tr);
}
}
static void wakeup_tracer_ctrl_update(struct trace_array *tr)
{
if (tr->ctrl)
start_wakeup_tracer(tr);
else
stop_wakeup_tracer(tr);
}
static void wakeup_tracer_open(struct trace_iterator *iter)
{
/* stop the trace while dumping */
if (iter->tr->ctrl)
stop_wakeup_tracer(iter->tr);
}
static void wakeup_tracer_close(struct trace_iterator *iter)
{
/* forget about any processes we were recording */
if (iter->tr->ctrl)
start_wakeup_tracer(iter->tr);
}
static struct tracer wakeup_tracer __read_mostly =
{
.name = "wakeup",
.init = wakeup_tracer_init,
.reset = wakeup_tracer_reset,
.open = wakeup_tracer_open,
.close = wakeup_tracer_close,
.ctrl_update = wakeup_tracer_ctrl_update,
.print_max = 1,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_wakeup,
#endif
};
__init static int init_wakeup_tracer(void)
{
int ret;
ret = register_tracer(&wakeup_tracer);
if (ret)
return ret;
return 0;
}
device_initcall(init_wakeup_tracer);

View file

@ -0,0 +1,540 @@
/* Include in trace.c */
#include <linux/kthread.h>
#include <linux/delay.h>
static inline int trace_valid_entry(struct trace_entry *entry)
{
switch (entry->type) {
case TRACE_FN:
case TRACE_CTX:
case TRACE_WAKE:
case TRACE_STACK:
case TRACE_SPECIAL:
return 1;
}
return 0;
}
static int
trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data)
{
struct trace_entry *entries;
struct page *page;
int idx = 0;
int i;
BUG_ON(list_empty(&data->trace_pages));
page = list_entry(data->trace_pages.next, struct page, lru);
entries = page_address(page);
check_pages(data);
if (head_page(data) != entries)
goto failed;
/*
* The starting trace buffer always has valid elements,
* if any element exists.
*/
entries = head_page(data);
for (i = 0; i < tr->entries; i++) {
if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) {
printk(KERN_CONT ".. invalid entry %d ",
entries[idx].type);
goto failed;
}
idx++;
if (idx >= ENTRIES_PER_PAGE) {
page = virt_to_page(entries);
if (page->lru.next == &data->trace_pages) {
if (i != tr->entries - 1) {
printk(KERN_CONT ".. entries buffer mismatch");
goto failed;
}
} else {
page = list_entry(page->lru.next, struct page, lru);
entries = page_address(page);
}
idx = 0;
}
}
page = virt_to_page(entries);
if (page->lru.next != &data->trace_pages) {
printk(KERN_CONT ".. too many entries");
goto failed;
}
return 0;
failed:
/* disable tracing */
tracing_disabled = 1;
printk(KERN_CONT ".. corrupted trace buffer .. ");
return -1;
}
/*
* Test the trace buffer to see if all the elements
* are still sane.
*/
static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
{
unsigned long flags, cnt = 0;
int cpu, ret = 0;
/* Don't allow flipping of max traces now */
raw_local_irq_save(flags);
__raw_spin_lock(&ftrace_max_lock);
for_each_possible_cpu(cpu) {
if (!head_page(tr->data[cpu]))
continue;
cnt += tr->data[cpu]->trace_idx;
ret = trace_test_buffer_cpu(tr, tr->data[cpu]);
if (ret)
break;
}
__raw_spin_unlock(&ftrace_max_lock);
raw_local_irq_restore(flags);
if (count)
*count = cnt;
return ret;
}
#ifdef CONFIG_FTRACE
#ifdef CONFIG_DYNAMIC_FTRACE
#define __STR(x) #x
#define STR(x) __STR(x)
/* Test dynamic code modification and ftrace filters */
int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
struct trace_array *tr,
int (*func)(void))
{
unsigned long count;
int ret;
int save_ftrace_enabled = ftrace_enabled;
int save_tracer_enabled = tracer_enabled;
char *func_name;
/* The ftrace test PASSED */
printk(KERN_CONT "PASSED\n");
pr_info("Testing dynamic ftrace: ");
/* enable tracing, and record the filter function */
ftrace_enabled = 1;
tracer_enabled = 1;
/* passed in by parameter to fool gcc from optimizing */
func();
/* update the records */
ret = ftrace_force_update();
if (ret) {
printk(KERN_CONT ".. ftraced failed .. ");
return ret;
}
/*
* Some archs *cough*PowerPC*cough* add charachters to the
* start of the function names. We simply put a '*' to
* accomodate them.
*/
func_name = "*" STR(DYN_FTRACE_TEST_NAME);
/* filter only on our function */
ftrace_set_filter(func_name, strlen(func_name), 1);
/* enable tracing */
tr->ctrl = 1;
trace->init(tr);
/* Sleep for a 1/10 of a second */
msleep(100);
/* we should have nothing in the buffer */
ret = trace_test_buffer(tr, &count);
if (ret)
goto out;
if (count) {
ret = -1;
printk(KERN_CONT ".. filter did not filter .. ");
goto out;
}
/* call our function again */
func();
/* sleep again */
msleep(100);
/* stop the tracing. */
tr->ctrl = 0;
trace->ctrl_update(tr);
ftrace_enabled = 0;
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
/* we should only have one item */
if (!ret && count != 1) {
printk(KERN_CONT ".. filter failed count=%ld ..", count);
ret = -1;
goto out;
}
out:
ftrace_enabled = save_ftrace_enabled;
tracer_enabled = save_tracer_enabled;
/* Enable tracing on all functions again */
ftrace_set_filter(NULL, 0, 1);
return ret;
}
#else
# define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; })
#endif /* CONFIG_DYNAMIC_FTRACE */
/*
* Simple verification test of ftrace function tracer.
* Enable ftrace, sleep 1/10 second, and then read the trace
* buffer to see if all is in order.
*/
int
trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
{
unsigned long count;
int ret;
int save_ftrace_enabled = ftrace_enabled;
int save_tracer_enabled = tracer_enabled;
/* make sure msleep has been recorded */
msleep(1);
/* force the recorded functions to be traced */
ret = ftrace_force_update();
if (ret) {
printk(KERN_CONT ".. ftraced failed .. ");
return ret;
}
/* start the tracing */
ftrace_enabled = 1;
tracer_enabled = 1;
tr->ctrl = 1;
trace->init(tr);
/* Sleep for a 1/10 of a second */
msleep(100);
/* stop the tracing. */
tr->ctrl = 0;
trace->ctrl_update(tr);
ftrace_enabled = 0;
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
ret = -1;
goto out;
}
ret = trace_selftest_startup_dynamic_tracing(trace, tr,
DYN_FTRACE_TEST_NAME);
out:
ftrace_enabled = save_ftrace_enabled;
tracer_enabled = save_tracer_enabled;
/* kill ftrace totally if we failed */
if (ret)
ftrace_kill();
return ret;
}
#endif /* CONFIG_FTRACE */
#ifdef CONFIG_IRQSOFF_TRACER
int
trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
{
unsigned long save_max = tracing_max_latency;
unsigned long count;
int ret;
/* start the tracing */
tr->ctrl = 1;
trace->init(tr);
/* reset the max latency */
tracing_max_latency = 0;
/* disable interrupts for a bit */
local_irq_disable();
udelay(100);
local_irq_enable();
/* stop the tracing. */
tr->ctrl = 0;
trace->ctrl_update(tr);
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
if (!ret)
ret = trace_test_buffer(&max_tr, &count);
trace->reset(tr);
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
ret = -1;
}
tracing_max_latency = save_max;
return ret;
}
#endif /* CONFIG_IRQSOFF_TRACER */
#ifdef CONFIG_PREEMPT_TRACER
int
trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
{
unsigned long save_max = tracing_max_latency;
unsigned long count;
int ret;
/* start the tracing */
tr->ctrl = 1;
trace->init(tr);
/* reset the max latency */
tracing_max_latency = 0;
/* disable preemption for a bit */
preempt_disable();
udelay(100);
preempt_enable();
/* stop the tracing. */
tr->ctrl = 0;
trace->ctrl_update(tr);
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
if (!ret)
ret = trace_test_buffer(&max_tr, &count);
trace->reset(tr);
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
ret = -1;
}
tracing_max_latency = save_max;
return ret;
}
#endif /* CONFIG_PREEMPT_TRACER */
#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
int
trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr)
{
unsigned long save_max = tracing_max_latency;
unsigned long count;
int ret;
/* start the tracing */
tr->ctrl = 1;
trace->init(tr);
/* reset the max latency */
tracing_max_latency = 0;
/* disable preemption and interrupts for a bit */
preempt_disable();
local_irq_disable();
udelay(100);
preempt_enable();
/* reverse the order of preempt vs irqs */
local_irq_enable();
/* stop the tracing. */
tr->ctrl = 0;
trace->ctrl_update(tr);
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
if (ret)
goto out;
ret = trace_test_buffer(&max_tr, &count);
if (ret)
goto out;
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
ret = -1;
goto out;
}
/* do the test by disabling interrupts first this time */
tracing_max_latency = 0;
tr->ctrl = 1;
trace->ctrl_update(tr);
preempt_disable();
local_irq_disable();
udelay(100);
preempt_enable();
/* reverse the order of preempt vs irqs */
local_irq_enable();
/* stop the tracing. */
tr->ctrl = 0;
trace->ctrl_update(tr);
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
if (ret)
goto out;
ret = trace_test_buffer(&max_tr, &count);
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
ret = -1;
goto out;
}
out:
trace->reset(tr);
tracing_max_latency = save_max;
return ret;
}
#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */
#ifdef CONFIG_SCHED_TRACER
static int trace_wakeup_test_thread(void *data)
{
/* Make this a RT thread, doesn't need to be too high */
struct sched_param param = { .sched_priority = 5 };
struct completion *x = data;
sched_setscheduler(current, SCHED_FIFO, &param);
/* Make it know we have a new prio */
complete(x);
/* now go to sleep and let the test wake us up */
set_current_state(TASK_INTERRUPTIBLE);
schedule();
/* we are awake, now wait to disappear */
while (!kthread_should_stop()) {
/*
* This is an RT task, do short sleeps to let
* others run.
*/
msleep(100);
}
return 0;
}
int
trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
{
unsigned long save_max = tracing_max_latency;
struct task_struct *p;
struct completion isrt;
unsigned long count;
int ret;
init_completion(&isrt);
/* create a high prio thread */
p = kthread_run(trace_wakeup_test_thread, &isrt, "ftrace-test");
if (IS_ERR(p)) {
printk(KERN_CONT "Failed to create ftrace wakeup test thread ");
return -1;
}
/* make sure the thread is running at an RT prio */
wait_for_completion(&isrt);
/* start the tracing */
tr->ctrl = 1;
trace->init(tr);
/* reset the max latency */
tracing_max_latency = 0;
/* sleep to let the RT thread sleep too */
msleep(100);
/*
* Yes this is slightly racy. It is possible that for some
* strange reason that the RT thread we created, did not
* call schedule for 100ms after doing the completion,
* and we do a wakeup on a task that already is awake.
* But that is extremely unlikely, and the worst thing that
* happens in such a case, is that we disable tracing.
* Honestly, if this race does happen something is horrible
* wrong with the system.
*/
wake_up_process(p);
/* stop the tracing. */
tr->ctrl = 0;
trace->ctrl_update(tr);
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
if (!ret)
ret = trace_test_buffer(&max_tr, &count);
trace->reset(tr);
tracing_max_latency = save_max;
/* kill the thread */
kthread_stop(p);
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
ret = -1;
}
return ret;
}
#endif /* CONFIG_SCHED_TRACER */
#ifdef CONFIG_CONTEXT_SWITCH_TRACER
int
trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr)
{
unsigned long count;
int ret;
/* start the tracing */
tr->ctrl = 1;
trace->init(tr);
/* Sleep for a 1/10 of a second */
msleep(100);
/* stop the tracing. */
tr->ctrl = 0;
trace->ctrl_update(tr);
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
ret = -1;
}
return ret;
}
#endif /* CONFIG_CONTEXT_SWITCH_TRACER */

View file

@ -0,0 +1,7 @@
#include "trace.h"
int DYN_FTRACE_TEST_NAME(void)
{
/* used to call mcount */
return 0;
}

View file

@ -634,6 +634,8 @@ config LATENCYTOP
Enable this option if you want to use the LatencyTOP tool
to find out which userspace is blocking on what kernel operations.
source kernel/trace/Kconfig
config PROVIDE_OHCI1394_DMA_INIT
bool "Remote debugging over FireWire early on boot"
depends on PCI && X86

View file

@ -8,6 +8,15 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
sha1.o irq_regs.o reciprocal_div.o argv_split.o \
proportions.o prio_heap.o ratelimit.o
ifdef CONFIG_FTRACE
# Do not profile string.o, since it may be used in early boot or vdso
CFLAGS_REMOVE_string.o = -pg
# Also do not profile any debug utilities
CFLAGS_REMOVE_spinlock_debug.o = -pg
CFLAGS_REMOVE_list_debug.o = -pg
CFLAGS_REMOVE_debugobjects.o = -pg
endif
lib-$(CONFIG_MMU) += ioremap.o
lib-$(CONFIG_SMP) += cpumask.o

View file

@ -7,7 +7,7 @@
#include <linux/kallsyms.h>
#include <linux/sched.h>
unsigned int debug_smp_processor_id(void)
notrace unsigned int debug_smp_processor_id(void)
{
unsigned long preempt_count = preempt_count();
int this_cpu = raw_smp_processor_id();
@ -37,7 +37,7 @@ unsigned int debug_smp_processor_id(void)
/*
* Avoid recursion:
*/
preempt_disable();
preempt_disable_notrace();
if (!printk_ratelimit())
goto out_enable;
@ -49,7 +49,7 @@ unsigned int debug_smp_processor_id(void)
dump_stack();
out_enable:
preempt_enable_no_resched();
preempt_enable_no_resched_notrace();
out:
return this_cpu;
}

View file

@ -126,8 +126,6 @@ static void background_writeout(unsigned long _min_pages);
static struct prop_descriptor vm_completions;
static struct prop_descriptor vm_dirties;
static unsigned long determine_dirtyable_memory(void);
/*
* couple the period to the dirty_ratio:
*
@ -347,7 +345,13 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
#endif
}
static unsigned long determine_dirtyable_memory(void)
/**
* determine_dirtyable_memory - amount of memory that may be used
*
* Returns the numebr of pages that can currently be freed and used
* by the kernel for direct mappings.
*/
unsigned long determine_dirtyable_memory(void)
{
unsigned long x;

View file

@ -96,7 +96,8 @@ basename_flags = -D"KBUILD_BASENAME=KBUILD_STR($(call name-fix,$(basetarget)))"
modname_flags = $(if $(filter 1,$(words $(modname))),\
-D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))")
_c_flags = $(KBUILD_CFLAGS) $(ccflags-y) $(CFLAGS_$(basetarget).o)
orig_c_flags = $(KBUILD_CFLAGS) $(ccflags-y) $(CFLAGS_$(basetarget).o)
_c_flags = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(orig_c_flags))
_a_flags = $(KBUILD_AFLAGS) $(asflags-y) $(AFLAGS_$(basetarget).o)
_cpp_flags = $(KBUILD_CPPFLAGS) $(cppflags-y) $(CPPFLAGS_$(@F))