Merge branch 'perfcounters-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perfcounters-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (574 commits)
  perf_counter: Turn off by default
  perf_counter: Add counter->id to the throttle event
  perf_counter: Better align code
  perf_counter: Rename L2 to LL cache
  perf_counter: Standardize event names
  perf_counter: Rename enums
  perf_counter tools: Clean up u64 usage
  perf_counter: Rename perf_counter_limit sysctl
  perf_counter: More paranoia settings
  perf_counter: powerpc: Implement generalized cache events for POWER processors
  perf_counters: powerpc: Add support for POWER7 processors
  perf_counter: Accurate period data
  perf_counter: Introduce struct for sample data
  perf_counter tools: Normalize data using per sample period data
  perf_counter: Annotate exit ctx recursion
  perf_counter tools: Propagate signals properly
  perf_counter tools: Small frequency related fixes
  perf_counter: More aggressive frequency adjustment
  perf_counter/x86: Fix the model number of Intel Core2 processors
  perf_counter, x86: Correct some event and umask values for Intel processors
  ...
This commit is contained in:
Linus Torvalds 2009-06-11 14:01:07 -07:00
commit 8a1ca8cedd
138 changed files with 27407 additions and 86 deletions

View file

@ -4403,6 +4403,16 @@ S: Maintained
F: include/linux/delayacct.h
F: kernel/delayacct.c
PERFORMANCE COUNTER SUBSYSTEM
P: Peter Zijlstra
M: a.p.zijlstra@chello.nl
P: Paul Mackerras
M: paulus@samba.org
P: Ingo Molnar
M: mingo@elte.hu
L: linux-kernel@vger.kernel.org
S: Supported
PERSONALITY HANDLING
P: Christoph Hellwig
M: hch@infradead.org

View file

@ -131,5 +131,44 @@ static inline int irqs_disabled_flags(unsigned long flags)
*/
struct irq_chip;
#ifdef CONFIG_PERF_COUNTERS
static inline unsigned long test_perf_counter_pending(void)
{
unsigned long x;
asm volatile("lbz %0,%1(13)"
: "=r" (x)
: "i" (offsetof(struct paca_struct, perf_counter_pending)));
return x;
}
static inline void set_perf_counter_pending(void)
{
asm volatile("stb %0,%1(13)" : :
"r" (1),
"i" (offsetof(struct paca_struct, perf_counter_pending)));
}
static inline void clear_perf_counter_pending(void)
{
asm volatile("stb %0,%1(13)" : :
"r" (0),
"i" (offsetof(struct paca_struct, perf_counter_pending)));
}
extern void perf_counter_do_pending(void);
#else
static inline unsigned long test_perf_counter_pending(void)
{
return 0;
}
static inline void set_perf_counter_pending(void) {}
static inline void clear_perf_counter_pending(void) {}
static inline void perf_counter_do_pending(void) {}
#endif /* CONFIG_PERF_COUNTERS */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HW_IRQ_H */

View file

@ -99,6 +99,7 @@ struct paca_struct {
u8 soft_enabled; /* irq soft-enable flag */
u8 hard_enabled; /* set if irqs are enabled in MSR */
u8 io_sync; /* writel() needs spin_unlock sync */
u8 perf_counter_pending; /* PM interrupt while soft-disabled */
/* Stuff for accurate time accounting */
u64 user_time; /* accumulated usermode TB ticks */

View file

@ -0,0 +1,98 @@
/*
* Performance counter support - PowerPC-specific definitions.
*
* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/types.h>
#define MAX_HWCOUNTERS 8
#define MAX_EVENT_ALTERNATIVES 8
#define MAX_LIMITED_HWCOUNTERS 2
/*
* This struct provides the constants and functions needed to
* describe the PMU on a particular POWER-family CPU.
*/
struct power_pmu {
int n_counter;
int max_alternatives;
u64 add_fields;
u64 test_adder;
int (*compute_mmcr)(u64 events[], int n_ev,
unsigned int hwc[], u64 mmcr[]);
int (*get_constraint)(u64 event, u64 *mskp, u64 *valp);
int (*get_alternatives)(u64 event, unsigned int flags,
u64 alt[]);
void (*disable_pmc)(unsigned int pmc, u64 mmcr[]);
int (*limited_pmc_event)(u64 event);
u32 flags;
int n_generic;
int *generic_events;
int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
};
extern struct power_pmu *ppmu;
/*
* Values for power_pmu.flags
*/
#define PPMU_LIMITED_PMC5_6 1 /* PMC5/6 have limited function */
#define PPMU_ALT_SIPR 2 /* uses alternate posn for SIPR/HV */
/*
* Values for flags to get_alternatives()
*/
#define PPMU_LIMITED_PMC_OK 1 /* can put this on a limited PMC */
#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */
#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */
struct pt_regs;
extern unsigned long perf_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs) perf_misc_flags(regs)
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
/*
* The power_pmu.get_constraint function returns a 64-bit value and
* a 64-bit mask that express the constraints between this event and
* other events.
*
* The value and mask are divided up into (non-overlapping) bitfields
* of three different types:
*
* Select field: this expresses the constraint that some set of bits
* in MMCR* needs to be set to a specific value for this event. For a
* select field, the mask contains 1s in every bit of the field, and
* the value contains a unique value for each possible setting of the
* MMCR* bits. The constraint checking code will ensure that two events
* that set the same field in their masks have the same value in their
* value dwords.
*
* Add field: this expresses the constraint that there can be at most
* N events in a particular class. A field of k bits can be used for
* N <= 2^(k-1) - 1. The mask has the most significant bit of the field
* set (and the other bits 0), and the value has only the least significant
* bit of the field set. In addition, the 'add_fields' and 'test_adder'
* in the struct power_pmu for this processor come into play. The
* add_fields value contains 1 in the LSB of the field, and the
* test_adder contains 2^(k-1) - 1 - N in the field.
*
* NAND field: this expresses the constraint that you may not have events
* in all of a set of classes. (For example, on PPC970, you can't select
* events from the FPU, ISU and IDU simultaneously, although any two are
* possible.) For N classes, the field is N+1 bits wide, and each class
* is assigned one bit from the least-significant N bits. The mask has
* only the most-significant bit set, and the value has only the bit
* for the event's class set. The test_adder has the least significant
* bit set in the field.
*
* If an event is not subject to the constraint expressed by a particular
* field, then it will have 0 in both the mask and value for that field.
*/

View file

@ -492,11 +492,13 @@
#define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */
#define SPRN_MMCR1 798
#define SPRN_MMCRA 0x312
#define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */
#define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */
#define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */
#define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */
#define MMCRA_SLOT_SHIFT 24
#define MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */
#define POWER6_MMCRA_SDSYNC 0x0000080000000000ULL /* SDAR/SIAR synced */
#define POWER6_MMCRA_SIHV 0x0000040000000000ULL
#define POWER6_MMCRA_SIPR 0x0000020000000000ULL
#define POWER6_MMCRA_THRM 0x00000020UL

View file

@ -322,6 +322,6 @@ SYSCALL_SPU(epoll_create1)
SYSCALL_SPU(dup3)
SYSCALL_SPU(pipe2)
SYSCALL(inotify_init1)
SYSCALL(ni_syscall)
SYSCALL_SPU(perf_counter_open)
COMPAT_SYS_SPU(preadv)
COMPAT_SYS_SPU(pwritev)

View file

@ -341,6 +341,7 @@
#define __NR_dup3 316
#define __NR_pipe2 317
#define __NR_inotify_init1 318
#define __NR_perf_counter_open 319
#define __NR_preadv 320
#define __NR_pwritev 321

View file

@ -94,6 +94,9 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o power4-pmu.o ppc970-pmu.o \
power5-pmu.o power5+-pmu.o power6-pmu.o \
power7-pmu.o
obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o

View file

@ -131,6 +131,7 @@ int main(void)
DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending));
DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));

View file

@ -526,6 +526,15 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
2:
TRACE_AND_RESTORE_IRQ(r5);
#ifdef CONFIG_PERF_COUNTERS
/* check paca->perf_counter_pending if we're enabling ints */
lbz r3,PACAPERFPEND(r13)
and. r3,r3,r5
beq 27f
bl .perf_counter_do_pending
27:
#endif /* CONFIG_PERF_COUNTERS */
/* extract EE bit and use it to restore paca->hard_enabled */
ld r3,_MSR(r1)
rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */

View file

@ -135,6 +135,11 @@ notrace void raw_local_irq_restore(unsigned long en)
iseries_handle_interrupts();
}
if (test_perf_counter_pending()) {
clear_perf_counter_pending();
perf_counter_do_pending();
}
/*
* if (get_paca()->hard_enabled) return;
* But again we need to take care that gcc gets hard_enabled directly

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,598 @@
/*
* Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors.
*
* Copyright 2009 Paul Mackerras, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/perf_counter.h>
#include <asm/reg.h>
/*
* Bits in event code for POWER4
*/
#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
#define PM_PMC_MSK 0xf
#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
#define PM_UNIT_MSK 0xf
#define PM_LOWER_SH 6
#define PM_LOWER_MSK 1
#define PM_LOWER_MSKS 0x40
#define PM_BYTE_SH 4 /* Byte number of event bus to use */
#define PM_BYTE_MSK 3
#define PM_PMCSEL_MSK 7
/*
* Unit code values
*/
#define PM_FPU 1
#define PM_ISU1 2
#define PM_IFU 3
#define PM_IDU0 4
#define PM_ISU1_ALT 6
#define PM_ISU2 7
#define PM_IFU_ALT 8
#define PM_LSU0 9
#define PM_LSU1 0xc
#define PM_GPS 0xf
/*
* Bits in MMCR0 for POWER4
*/
#define MMCR0_PMC1SEL_SH 8
#define MMCR0_PMC2SEL_SH 1
#define MMCR_PMCSEL_MSK 0x1f
/*
* Bits in MMCR1 for POWER4
*/
#define MMCR1_TTM0SEL_SH 62
#define MMCR1_TTC0SEL_SH 61
#define MMCR1_TTM1SEL_SH 59
#define MMCR1_TTC1SEL_SH 58
#define MMCR1_TTM2SEL_SH 56
#define MMCR1_TTC2SEL_SH 55
#define MMCR1_TTM3SEL_SH 53
#define MMCR1_TTC3SEL_SH 52
#define MMCR1_TTMSEL_MSK 3
#define MMCR1_TD_CP_DBG0SEL_SH 50
#define MMCR1_TD_CP_DBG1SEL_SH 48
#define MMCR1_TD_CP_DBG2SEL_SH 46
#define MMCR1_TD_CP_DBG3SEL_SH 44
#define MMCR1_DEBUG0SEL_SH 43
#define MMCR1_DEBUG1SEL_SH 42
#define MMCR1_DEBUG2SEL_SH 41
#define MMCR1_DEBUG3SEL_SH 40
#define MMCR1_PMC1_ADDER_SEL_SH 39
#define MMCR1_PMC2_ADDER_SEL_SH 38
#define MMCR1_PMC6_ADDER_SEL_SH 37
#define MMCR1_PMC5_ADDER_SEL_SH 36
#define MMCR1_PMC8_ADDER_SEL_SH 35
#define MMCR1_PMC7_ADDER_SEL_SH 34
#define MMCR1_PMC3_ADDER_SEL_SH 33
#define MMCR1_PMC4_ADDER_SEL_SH 32
#define MMCR1_PMC3SEL_SH 27
#define MMCR1_PMC4SEL_SH 22
#define MMCR1_PMC5SEL_SH 17
#define MMCR1_PMC6SEL_SH 12
#define MMCR1_PMC7SEL_SH 7
#define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */
static short mmcr1_adder_bits[8] = {
MMCR1_PMC1_ADDER_SEL_SH,
MMCR1_PMC2_ADDER_SEL_SH,
MMCR1_PMC3_ADDER_SEL_SH,
MMCR1_PMC4_ADDER_SEL_SH,
MMCR1_PMC5_ADDER_SEL_SH,
MMCR1_PMC6_ADDER_SEL_SH,
MMCR1_PMC7_ADDER_SEL_SH,
MMCR1_PMC8_ADDER_SEL_SH
};
/*
* Bits in MMCRA
*/
#define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */
/*
* Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000
* 3210987654321098765432109876543210987654321098765432109876543210
* |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><>
* | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
* \SMPL ||\TTC3SEL
* |\TTC_IFU_SEL
* \TTM2SEL0
*
* SMPL - SAMPLE_ENABLE constraint
* 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000
*
* UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2
* 55: UC1 error 0x0080_0000_0000_0000
* 54: FPU events needed 0x0040_0000_0000_0000
* 53: ISU1 events needed 0x0020_0000_0000_0000
* 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000
*
* UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0
* 51: UC2 error 0x0008_0000_0000_0000
* 50: FPU events needed 0x0004_0000_0000_0000
* 49: IFU events needed 0x0002_0000_0000_0000
* 48: LSU0 events needed 0x0001_0000_0000_0000
*
* UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1
* 47: UC3 error 0x8000_0000_0000
* 46: LSU0 events needed 0x4000_0000_0000
* 45: IFU events needed 0x2000_0000_0000
* 44: IDU0|ISU2 events needed 0x1000_0000_0000
* 43: ISU1 events needed 0x0800_0000_0000
*
* TTM2SEL0
* 42: 0 = IDU0 events needed
* 1 = ISU2 events needed 0x0400_0000_0000
*
* TTC_IFU_SEL
* 41: 0 = IFU.U events needed
* 1 = IFU.L events needed 0x0200_0000_0000
*
* TTC3SEL
* 40: 0 = LSU1.U events needed
* 1 = LSU1.L events needed 0x0100_0000_0000
*
* PS1
* 39: PS1 error 0x0080_0000_0000
* 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
*
* PS2
* 35: PS2 error 0x0008_0000_0000
* 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
*
* B0
* 28-31: Byte 0 event source 0xf000_0000
* 1 = FPU
* 2 = ISU1
* 3 = IFU
* 4 = IDU0
* 7 = ISU2
* 9 = LSU0
* c = LSU1
* f = GPS
*
* B1, B2, B3
* 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
*
* P8
* 15: P8 error 0x8000
* 14-15: Count of events needing PMC8
*
* P1..P7
* 0-13: Count of events needing PMC1..PMC7
*
* Note: this doesn't allow events using IFU.U to be combined with events
* using IFU.L, though that is feasible (using TTM0 and TTM2). However
* there are no listed events for IFU.L (they are debug events not
* verified for performance monitoring) so this shouldn't cause a
* problem.
*/
static struct unitinfo {
u64 value, mask;
int unit;
int lowerbit;
} p4_unitinfo[16] = {
[PM_FPU] = { 0x44000000000000ull, 0x88000000000000ull, PM_FPU, 0 },
[PM_ISU1] = { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 },
[PM_ISU1_ALT] =
{ 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 },
[PM_IFU] = { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 },
[PM_IFU_ALT] =
{ 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 },
[PM_IDU0] = { 0x10100000000000ull, 0x80840000000000ull, PM_IDU0, 1 },
[PM_ISU2] = { 0x10140000000000ull, 0x80840000000000ull, PM_ISU2, 0 },
[PM_LSU0] = { 0x01400000000000ull, 0x08800000000000ull, PM_LSU0, 0 },
[PM_LSU1] = { 0x00000000000000ull, 0x00010000000000ull, PM_LSU1, 40 },
[PM_GPS] = { 0x00000000000000ull, 0x00000000000000ull, PM_GPS, 0 }
};
static unsigned char direct_marked_event[8] = {
(1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
(1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
(1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */
(1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
(1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */
(1<<3) | (1<<4) | (1<<5),
/* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
(1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
(1<<4), /* PMC8: PM_MRK_LSU_FIN */
};
/*
* Returns 1 if event counts things relating to marked instructions
* and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
*/
static int p4_marked_instr_event(u64 event)
{
int pmc, psel, unit, byte, bit;
unsigned int mask;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
psel = event & PM_PMCSEL_MSK;
if (pmc) {
if (direct_marked_event[pmc - 1] & (1 << psel))
return 1;
if (psel == 0) /* add events */
bit = (pmc <= 4)? pmc - 1: 8 - pmc;
else if (psel == 6) /* decode events */
bit = 4;
else
return 0;
} else
bit = psel;
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
mask = 0;
switch (unit) {
case PM_LSU1:
if (event & PM_LOWER_MSKS)
mask = 1 << 28; /* byte 7 bit 4 */
else
mask = 6 << 24; /* byte 3 bits 1 and 2 */
break;
case PM_LSU0:
/* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */
mask = 0x083dff00;
}
return (mask >> (byte * 8 + bit)) & 1;
}
static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp)
{
int pmc, byte, unit, lower, sh;
u64 mask = 0, value = 0;
int grp = -1;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc > 8)
return -1;
sh = (pmc - 1) * 2;
mask |= 2 << sh;
value |= 1 << sh;
grp = ((pmc - 1) >> 1) & 1;
}
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
if (unit) {
lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK;
/*
* Bus events on bytes 0 and 2 can be counted
* on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
*/
if (!pmc)
grp = byte & 1;
if (!p4_unitinfo[unit].unit)
return -1;
mask |= p4_unitinfo[unit].mask;
value |= p4_unitinfo[unit].value;
sh = p4_unitinfo[unit].lowerbit;
if (sh > 1)
value |= (u64)lower << sh;
else if (lower != sh)
return -1;
unit = p4_unitinfo[unit].unit;
/* Set byte lane select field */
mask |= 0xfULL << (28 - 4 * byte);
value |= (u64)unit << (28 - 4 * byte);
}
if (grp == 0) {
/* increment PMC1/2/5/6 field */
mask |= 0x8000000000ull;
value |= 0x1000000000ull;
} else {
/* increment PMC3/4/7/8 field */
mask |= 0x800000000ull;
value |= 0x100000000ull;
}
/* Marked instruction events need sample_enable set */
if (p4_marked_instr_event(event)) {
mask |= 1ull << 56;
value |= 1ull << 56;
}
/* PMCSEL=6 decode events on byte 2 need sample_enable clear */
if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2)
mask |= 1ull << 56;
*maskp = mask;
*valp = value;
return 0;
}
static unsigned int ppc_inst_cmpl[] = {
0x1001, 0x4001, 0x6001, 0x7001, 0x8001
};
static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[])
{
int i, j, na;
alt[0] = event;
na = 1;
/* 2 possibilities for PM_GRP_DISP_REJECT */
if (event == 0x8003 || event == 0x0224) {
alt[1] = event ^ (0x8003 ^ 0x0224);
return 2;
}
/* 2 possibilities for PM_ST_MISS_L1 */
if (event == 0x0c13 || event == 0x0c23) {
alt[1] = event ^ (0x0c13 ^ 0x0c23);
return 2;
}
/* several possibilities for PM_INST_CMPL */
for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) {
if (event == ppc_inst_cmpl[i]) {
for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j)
if (j != i)
alt[na++] = ppc_inst_cmpl[j];
break;
}
}
return na;
}
static int p4_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], u64 mmcr[])
{
u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0;
unsigned int pmc, unit, byte, psel, lower;
unsigned int ttm, grp;
unsigned int pmc_inuse = 0;
unsigned int pmc_grp_use[2];
unsigned char busbyte[4];
unsigned char unituse[16];
unsigned int unitlower = 0;
int i;
if (n_ev > 8)
return -1;
/* First pass to count resource use */
pmc_grp_use[0] = pmc_grp_use[1] = 0;
memset(busbyte, 0, sizeof(busbyte));
memset(unituse, 0, sizeof(unituse));
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc_inuse & (1 << (pmc - 1)))
return -1;
pmc_inuse |= 1 << (pmc - 1);
/* count 1/2/5/6 vs 3/4/7/8 use */
++pmc_grp_use[((pmc - 1) >> 1) & 1];
}
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK;
if (unit) {
if (!pmc)
++pmc_grp_use[byte & 1];
if (unit == 6 || unit == 8)
/* map alt ISU1/IFU codes: 6->2, 8->3 */
unit = (unit >> 1) - 1;
if (busbyte[byte] && busbyte[byte] != unit)
return -1;
busbyte[byte] = unit;
lower <<= unit;
if (unituse[unit] && lower != (unitlower & lower))
return -1;
unituse[unit] = 1;
unitlower |= lower;
}
}
if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
return -1;
/*
* Assign resources and set multiplexer selects.
*
* Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2.
* Each TTMx can only select one unit, but since
* units 2 and 6 are both ISU1, and 3 and 8 are both IFU,
* we have some choices.
*/
if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) {
unituse[6] = 1; /* Move 2 to 6 */
unituse[2] = 0;
}
if (unituse[3] & (unituse[1] | unituse[2])) {
unituse[8] = 1; /* Move 3 to 8 */
unituse[3] = 0;
unitlower = (unitlower & ~8) | ((unitlower & 8) << 5);
}
/* Check only one unit per TTMx */
if (unituse[1] + unituse[2] + unituse[3] > 1 ||
unituse[4] + unituse[6] + unituse[7] > 1 ||
unituse[8] + unituse[9] > 1 ||
(unituse[5] | unituse[10] | unituse[11] |
unituse[13] | unituse[14]))
return -1;
/* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */
mmcr1 |= (u64)(unituse[3] * 2 + unituse[2]) << MMCR1_TTM0SEL_SH;
mmcr1 |= (u64)(unituse[7] * 3 + unituse[6] * 2) << MMCR1_TTM1SEL_SH;
mmcr1 |= (u64)unituse[9] << MMCR1_TTM2SEL_SH;
/* Set TTCxSEL fields. */
if (unitlower & 0xe)
mmcr1 |= 1ull << MMCR1_TTC0SEL_SH;
if (unitlower & 0xf0)
mmcr1 |= 1ull << MMCR1_TTC1SEL_SH;
if (unitlower & 0xf00)
mmcr1 |= 1ull << MMCR1_TTC2SEL_SH;
if (unitlower & 0x7000)
mmcr1 |= 1ull << MMCR1_TTC3SEL_SH;
/* Set byte lane select fields. */
for (byte = 0; byte < 4; ++byte) {
unit = busbyte[byte];
if (!unit)
continue;
if (unit == 0xf) {
/* special case for GPS */
mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte);
} else {
if (!unituse[unit])
ttm = unit - 1; /* 2->1, 3->2 */
else
ttm = unit >> 2;
mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2*byte);
}
}
/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
psel = event[i] & PM_PMCSEL_MSK;
if (!pmc) {
/* Bus event or 00xxx direct event (off or cycles) */
if (unit)
psel |= 0x10 | ((byte & 2) << 2);
for (pmc = 0; pmc < 8; ++pmc) {
if (pmc_inuse & (1 << pmc))
continue;
grp = (pmc >> 1) & 1;
if (unit) {
if (grp == (byte & 1))
break;
} else if (pmc_grp_use[grp] < 4) {
++pmc_grp_use[grp];
break;
}
}
pmc_inuse |= 1 << pmc;
} else {
/* Direct event */
--pmc;
if (psel == 0 && (byte & 2))
/* add events on higher-numbered bus */
mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
else if (psel == 6 && byte == 3)
/* seem to need to set sample_enable here */
mmcra |= MMCRA_SAMPLE_ENABLE;
psel |= 8;
}
if (pmc <= 1)
mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc);
else
mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
if (pmc == 7) /* PMC8 */
mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH;
hwc[i] = pmc;
if (p4_marked_instr_event(event[i]))
mmcra |= MMCRA_SAMPLE_ENABLE;
}
if (pmc_inuse & 1)
mmcr0 |= MMCR0_PMC1CE;
if (pmc_inuse & 0xfe)
mmcr0 |= MMCR0_PMCjCE;
mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
/* Return MMCRx values */
mmcr[0] = mmcr0;
mmcr[1] = mmcr1;
mmcr[2] = mmcra;
return 0;
}
static void p4_disable_pmc(unsigned int pmc, u64 mmcr[])
{
/*
* Setting the PMCxSEL field to 0 disables PMC x.
* (Note that pmc is 0-based here, not 1-based.)
*/
if (pmc <= 1) {
mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc));
} else {
mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)));
if (pmc == 7)
mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH);
}
}
static int p4_generic_events[] = {
[PERF_COUNT_HW_CPU_CYCLES] = 7,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x1001,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */
[PERF_COUNT_HW_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */
[PERF_COUNT_HW_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */
};
#define C(x) PERF_COUNT_HW_CACHE_##x
/*
* Table of generalized cache-related events.
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x8c10, 0x3c10 },
[C(OP_WRITE)] = { 0x7c10, 0xc13 },
[C(OP_PREFETCH)] = { 0xc35, 0 },
},
[C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { 0, 0 },
},
[C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0 },
[C(OP_WRITE)] = { 0, 0 },
[C(OP_PREFETCH)] = { 0xc34, 0 },
},
[C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x904 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x900 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x330, 0x331 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
};
struct power_pmu power4_pmu = {
.n_counter = 8,
.max_alternatives = 5,
.add_fields = 0x0000001100005555ull,
.test_adder = 0x0011083300000000ull,
.compute_mmcr = p4_compute_mmcr,
.get_constraint = p4_get_constraint,
.get_alternatives = p4_get_alternatives,
.disable_pmc = p4_disable_pmc,
.n_generic = ARRAY_SIZE(p4_generic_events),
.generic_events = p4_generic_events,
.cache_events = &power4_cache_events,
};

View file

@ -0,0 +1,671 @@
/*
* Performance counter support for POWER5+/++ (not POWER5) processors.
*
* Copyright 2009 Paul Mackerras, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/perf_counter.h>
#include <asm/reg.h>
/*
* Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3)
*/
#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
#define PM_PMC_MSK 0xf
#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */
#define PM_UNIT_MSK 0xf
#define PM_BYTE_SH 12 /* Byte number of event bus to use */
#define PM_BYTE_MSK 7
#define PM_GRS_SH 8 /* Storage subsystem mux select */
#define PM_GRS_MSK 7
#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */
#define PM_PMCSEL_MSK 0x7f
/* Values in PM_UNIT field */
#define PM_FPU 0
#define PM_ISU0 1
#define PM_IFU 2
#define PM_ISU1 3
#define PM_IDU 4
#define PM_ISU0_ALT 6
#define PM_GRS 7
#define PM_LSU0 8
#define PM_LSU1 0xc
#define PM_LASTUNIT 0xc
/*
* Bits in MMCR1 for POWER5+
*/
#define MMCR1_TTM0SEL_SH 62
#define MMCR1_TTM1SEL_SH 60
#define MMCR1_TTM2SEL_SH 58
#define MMCR1_TTM3SEL_SH 56
#define MMCR1_TTMSEL_MSK 3
#define MMCR1_TD_CP_DBG0SEL_SH 54
#define MMCR1_TD_CP_DBG1SEL_SH 52
#define MMCR1_TD_CP_DBG2SEL_SH 50
#define MMCR1_TD_CP_DBG3SEL_SH 48
#define MMCR1_GRS_L2SEL_SH 46
#define MMCR1_GRS_L2SEL_MSK 3
#define MMCR1_GRS_L3SEL_SH 44
#define MMCR1_GRS_L3SEL_MSK 3
#define MMCR1_GRS_MCSEL_SH 41
#define MMCR1_GRS_MCSEL_MSK 7
#define MMCR1_GRS_FABSEL_SH 39
#define MMCR1_GRS_FABSEL_MSK 3
#define MMCR1_PMC1_ADDER_SEL_SH 35
#define MMCR1_PMC2_ADDER_SEL_SH 34
#define MMCR1_PMC3_ADDER_SEL_SH 33
#define MMCR1_PMC4_ADDER_SEL_SH 32
#define MMCR1_PMC1SEL_SH 25
#define MMCR1_PMC2SEL_SH 17
#define MMCR1_PMC3SEL_SH 9
#define MMCR1_PMC4SEL_SH 1
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
#define MMCR1_PMCSEL_MSK 0x7f
/*
* Bits in MMCRA
*/
/*
* Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000
* 3210987654321098765432109876543210987654321098765432109876543210
* [ ><><>< ><> <><>[ > < >< >< >< ><><><><><><>
* NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P6P5P4P3P2P1
*
* NC - number of counters
* 51: NC error 0x0008_0000_0000_0000
* 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
*
* G0..G3 - GRS mux constraints
* 46-47: GRS_L2SEL value
* 44-45: GRS_L3SEL value
* 41-44: GRS_MCSEL value
* 39-40: GRS_FABSEL value
* Note that these match up with their bit positions in MMCR1
*
* T0 - TTM0 constraint
* 36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000
*
* T1 - TTM1 constraint
* 34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000
*
* UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
* 33: UC3 error 0x02_0000_0000
* 32: FPU|IFU|ISU1 events needed 0x01_0000_0000
* 31: ISU0 events needed 0x01_8000_0000
* 30: IDU|GRS events needed 0x00_4000_0000
*
* B0
* 24-27: Byte 0 event source 0x0f00_0000
* Encoding as for the event code
*
* B1, B2, B3
* 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
*
* P6
* 11: P6 error 0x800
* 10-11: Count of events needing PMC6
*
* P1..P5
* 0-9: Count of events needing PMC1..PMC5
*/
static const int grsel_shift[8] = {
MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
};
/* Masks and values for using events from the various units */
static u64 unit_cons[PM_LASTUNIT+1][2] = {
[PM_FPU] = { 0x3200000000ull, 0x0100000000ull },
[PM_ISU0] = { 0x0200000000ull, 0x0080000000ull },
[PM_ISU1] = { 0x3200000000ull, 0x3100000000ull },
[PM_IFU] = { 0x3200000000ull, 0x2100000000ull },
[PM_IDU] = { 0x0e00000000ull, 0x0040000000ull },
[PM_GRS] = { 0x0e00000000ull, 0x0c40000000ull },
};
static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp)
{
int pmc, byte, unit, sh;
int bit, fmask;
u64 mask = 0, value = 0;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc > 6)
return -1;
sh = (pmc - 1) * 2;
mask |= 2 << sh;
value |= 1 << sh;
if (pmc >= 5 && !(event == 0x500009 || event == 0x600005))
return -1;
}
if (event & PM_BUSEVENT_MSK) {
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
if (unit > PM_LASTUNIT)
return -1;
if (unit == PM_ISU0_ALT)
unit = PM_ISU0;
mask |= unit_cons[unit][0];
value |= unit_cons[unit][1];
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
if (byte >= 4) {
if (unit != PM_LSU1)
return -1;
/* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
++unit;
byte &= 3;
}
if (unit == PM_GRS) {
bit = event & 7;
fmask = (bit == 6)? 7: 3;
sh = grsel_shift[bit];
mask |= (u64)fmask << sh;
value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
}
/* Set byte lane select field */
mask |= 0xfULL << (24 - 4 * byte);
value |= (u64)unit << (24 - 4 * byte);
}
if (pmc < 5) {
/* need a counter from PMC1-4 set */
mask |= 0x8000000000000ull;
value |= 0x1000000000000ull;
}
*maskp = mask;
*valp = value;
return 0;
}
static int power5p_limited_pmc_event(u64 event)
{
int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
return pmc == 5 || pmc == 6;
}
#define MAX_ALT 3 /* at most 3 alternatives for any event */
static const unsigned int event_alternatives[][MAX_ALT] = {
{ 0x100c0, 0x40001f }, /* PM_GCT_FULL_CYC */
{ 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */
{ 0x230e2, 0x323087 }, /* PM_BR_PRED_CR */
{ 0x230e3, 0x223087, 0x3230a0 }, /* PM_BR_PRED_TA */
{ 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */
{ 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */
{ 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */
{ 0x100005, 0x600005 }, /* PM_RUN_CYC */
{ 0x100009, 0x200009 }, /* PM_INST_CMPL */
{ 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */
{ 0x300009, 0x400009 }, /* PM_INST_DISP */
};
/*
* Scan the alternatives table for a match and return the
* index into the alternatives table if found, else -1.
*/
static int find_alternative(unsigned int event)
{
int i, j;
for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
if (event < event_alternatives[i][0])
break;
for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
if (event == event_alternatives[i][j])
return i;
}
return -1;
}
static const unsigned char bytedecode_alternatives[4][4] = {
/* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 },
/* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e },
/* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 },
/* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e }
};
/*
* Some direct events for decodes of event bus byte 3 have alternative
* PMCSEL values on other counters. This returns the alternative
* event code for those that do, or -1 otherwise. This also handles
* alternative PCMSEL values for add events.
*/
static s64 find_alternative_bdecode(u64 event)
{
int pmc, altpmc, pp, j;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc == 0 || pmc > 4)
return -1;
altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */
pp = event & PM_PMCSEL_MSK;
for (j = 0; j < 4; ++j) {
if (bytedecode_alternatives[pmc - 1][j] == pp) {
return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
(altpmc << PM_PMC_SH) |
bytedecode_alternatives[altpmc - 1][j];
}
}
/* new decode alternatives for power5+ */
if (pmc == 1 && (pp == 0x0d || pp == 0x0e))
return event + (2 << PM_PMC_SH) + (0x2e - 0x0d);
if (pmc == 3 && (pp == 0x2e || pp == 0x2f))
return event - (2 << PM_PMC_SH) - (0x2e - 0x0d);
/* alternative add event encodings */
if (pp == 0x10 || pp == 0x28)
return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) |
(altpmc << PM_PMC_SH);
return -1;
}
static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[])
{
int i, j, nalt = 1;
int nlim;
s64 ae;
alt[0] = event;
nalt = 1;
nlim = power5p_limited_pmc_event(event);
i = find_alternative(event);
if (i >= 0) {
for (j = 0; j < MAX_ALT; ++j) {
ae = event_alternatives[i][j];
if (ae && ae != event)
alt[nalt++] = ae;
nlim += power5p_limited_pmc_event(ae);
}
} else {
ae = find_alternative_bdecode(event);
if (ae > 0)
alt[nalt++] = ae;
}
if (flags & PPMU_ONLY_COUNT_RUN) {
/*
* We're only counting in RUN state,
* so PM_CYC is equivalent to PM_RUN_CYC
* and PM_INST_CMPL === PM_RUN_INST_CMPL.
* This doesn't include alternatives that don't provide
* any extra flexibility in assigning PMCs (e.g.
* 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC).
* Note that even with these additional alternatives
* we never end up with more than 3 alternatives for any event.
*/
j = nalt;
for (i = 0; i < nalt; ++i) {
switch (alt[i]) {
case 0xf: /* PM_CYC */
alt[j++] = 0x600005; /* PM_RUN_CYC */
++nlim;
break;
case 0x600005: /* PM_RUN_CYC */
alt[j++] = 0xf;
break;
case 0x100009: /* PM_INST_CMPL */
alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */
++nlim;
break;
case 0x500009: /* PM_RUN_INST_CMPL */
alt[j++] = 0x100009; /* PM_INST_CMPL */
alt[j++] = 0x200009;
break;
}
}
nalt = j;
}
if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
/* remove the limited PMC events */
j = 0;
for (i = 0; i < nalt; ++i) {
if (!power5p_limited_pmc_event(alt[i])) {
alt[j] = alt[i];
++j;
}
}
nalt = j;
} else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
/* remove all but the limited PMC events */
j = 0;
for (i = 0; i < nalt; ++i) {
if (power5p_limited_pmc_event(alt[i])) {
alt[j] = alt[i];
++j;
}
}
nalt = j;
}
return nalt;
}
/*
* Map of which direct events on which PMCs are marked instruction events.
* Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
* Bit 0 is set if it is marked for all PMCs.
* The 0x80 bit indicates a byte decode PMCSEL value.
*/
static unsigned char direct_event_is_marked[0x28] = {
0, /* 00 */
0x1f, /* 01 PM_IOPS_CMPL */
0x2, /* 02 PM_MRK_GRP_DISP */
0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
0, /* 04 */
0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
0x80, /* 06 */
0x80, /* 07 */
0, 0, 0,/* 08 - 0a */
0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
0, /* 0c */
0x80, /* 0d */
0x80, /* 0e */
0, /* 0f */
0, /* 10 */
0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
0, /* 12 */
0x10, /* 13 PM_MRK_GRP_CMPL */
0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
0x2, /* 15 PM_MRK_GRP_ISSUED */
0x80, /* 16 */
0x80, /* 17 */
0, 0, 0, 0, 0,
0x80, /* 1d */
0x80, /* 1e */
0, /* 1f */
0x80, /* 20 */
0x80, /* 21 */
0x80, /* 22 */
0x80, /* 23 */
0x80, /* 24 */
0x80, /* 25 */
0x80, /* 26 */
0x80, /* 27 */
};
/*
* Returns 1 if event counts things relating to marked instructions
* and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
*/
static int power5p_marked_instr_event(u64 event)
{
int pmc, psel;
int bit, byte, unit;
u32 mask;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
psel = event & PM_PMCSEL_MSK;
if (pmc >= 5)
return 0;
bit = -1;
if (psel < sizeof(direct_event_is_marked)) {
if (direct_event_is_marked[psel] & (1 << pmc))
return 1;
if (direct_event_is_marked[psel] & 0x80)
bit = 4;
else if (psel == 0x08)
bit = pmc - 1;
else if (psel == 0x10)
bit = 4 - pmc;
else if (psel == 0x1b && (pmc == 1 || pmc == 3))
bit = 4;
} else if ((psel & 0x48) == 0x40) {
bit = psel & 7;
} else if (psel == 0x28) {
bit = pmc - 1;
} else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) {
bit = 4;
}
if (!(event & PM_BUSEVENT_MSK) || bit == -1)
return 0;
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
if (unit == PM_LSU0) {
/* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
mask = 0x5dff00;
} else if (unit == PM_LSU1 && byte >= 4) {
byte -= 4;
/* byte 5 bits 6-7, byte 6 bits 0,4, byte 7 bits 0-4,6 */
mask = 0x5f11c000;
} else
return 0;
return (mask >> (byte * 8 + bit)) & 1;
}
static int power5p_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], u64 mmcr[])
{
u64 mmcr1 = 0;
u64 mmcra = 0;
unsigned int pmc, unit, byte, psel;
unsigned int ttm;
int i, isbus, bit, grsel;
unsigned int pmc_inuse = 0;
unsigned char busbyte[4];
unsigned char unituse[16];
int ttmuse;
if (n_ev > 6)
return -1;
/* First pass to count resource use */
memset(busbyte, 0, sizeof(busbyte));
memset(unituse, 0, sizeof(unituse));
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc > 6)
return -1;
if (pmc_inuse & (1 << (pmc - 1)))
return -1;
pmc_inuse |= 1 << (pmc - 1);
}
if (event[i] & PM_BUSEVENT_MSK) {
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
if (unit > PM_LASTUNIT)
return -1;
if (unit == PM_ISU0_ALT)
unit = PM_ISU0;
if (byte >= 4) {
if (unit != PM_LSU1)
return -1;
++unit;
byte &= 3;
}
if (busbyte[byte] && busbyte[byte] != unit)
return -1;
busbyte[byte] = unit;
unituse[unit] = 1;
}
}
/*
* Assign resources and set multiplexer selects.
*
* PM_ISU0 can go either on TTM0 or TTM1, but that's the only
* choice we have to deal with.
*/
if (unituse[PM_ISU0] &
(unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */
unituse[PM_ISU0] = 0;
}
/* Set TTM[01]SEL fields. */
ttmuse = 0;
for (i = PM_FPU; i <= PM_ISU1; ++i) {
if (!unituse[i])
continue;
if (ttmuse++)
return -1;
mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH;
}
ttmuse = 0;
for (; i <= PM_GRS; ++i) {
if (!unituse[i])
continue;
if (ttmuse++)
return -1;
mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH;
}
if (ttmuse > 1)
return -1;
/* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
for (byte = 0; byte < 4; ++byte) {
unit = busbyte[byte];
if (!unit)
continue;
if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
/* get ISU0 through TTM1 rather than TTM0 */
unit = PM_ISU0_ALT;
} else if (unit == PM_LSU1 + 1) {
/* select lower word of LSU1 for this byte */
mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
}
ttm = unit >> 2;
mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
}
/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
psel = event[i] & PM_PMCSEL_MSK;
isbus = event[i] & PM_BUSEVENT_MSK;
if (!pmc) {
/* Bus event or any-PMC direct event */
for (pmc = 0; pmc < 4; ++pmc) {
if (!(pmc_inuse & (1 << pmc)))
break;
}
if (pmc >= 4)
return -1;
pmc_inuse |= 1 << pmc;
} else if (pmc <= 4) {
/* Direct event */
--pmc;
if (isbus && (byte & 2) &&
(psel == 8 || psel == 0x10 || psel == 0x28))
/* add events on higher-numbered bus */
mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
} else {
/* Instructions or run cycles on PMC5/6 */
--pmc;
}
if (isbus && unit == PM_GRS) {
bit = psel & 7;
grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
mmcr1 |= (u64)grsel << grsel_shift[bit];
}
if (power5p_marked_instr_event(event[i]))
mmcra |= MMCRA_SAMPLE_ENABLE;
if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1))
/* select alternate byte lane */
psel |= 0x10;
if (pmc <= 3)
mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
hwc[i] = pmc;
}
/* Return MMCRx values */
mmcr[0] = 0;
if (pmc_inuse & 1)
mmcr[0] = MMCR0_PMC1CE;
if (pmc_inuse & 0x3e)
mmcr[0] |= MMCR0_PMCjCE;
mmcr[1] = mmcr1;
mmcr[2] = mmcra;
return 0;
}
static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[])
{
if (pmc <= 3)
mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
}
static int power5p_generic_events[] = {
[PERF_COUNT_HW_CPU_CYCLES] = 0xf,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x100009,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */
[PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */
[PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */
};
#define C(x) PERF_COUNT_HW_CACHE_##x
/*
* Table of generalized cache-related events.
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x1c10a8, 0x3c1088 },
[C(OP_WRITE)] = { 0x2c10a8, 0xc10c3 },
[C(OP_PREFETCH)] = { 0xc70e7, -1 },
},
[C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { 0, 0 },
},
[C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0 },
[C(OP_WRITE)] = { 0, 0 },
[C(OP_PREFETCH)] = { 0xc50c3, 0 },
},
[C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0xc20e4, 0x800c4 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x800c0 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x230e4, 0x230e5 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
};
struct power_pmu power5p_pmu = {
.n_counter = 6,
.max_alternatives = MAX_ALT,
.add_fields = 0x7000000000055ull,
.test_adder = 0x3000040000000ull,
.compute_mmcr = power5p_compute_mmcr,
.get_constraint = power5p_get_constraint,
.get_alternatives = power5p_get_alternatives,
.disable_pmc = power5p_disable_pmc,
.limited_pmc_event = power5p_limited_pmc_event,
.flags = PPMU_LIMITED_PMC5_6,
.n_generic = ARRAY_SIZE(power5p_generic_events),
.generic_events = power5p_generic_events,
.cache_events = &power5p_cache_events,
};

View file

@ -0,0 +1,611 @@
/*
* Performance counter support for POWER5 (not POWER5++) processors.
*
* Copyright 2009 Paul Mackerras, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/perf_counter.h>
#include <asm/reg.h>
/*
* Bits in event code for POWER5 (not POWER5++)
*/
#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
#define PM_PMC_MSK 0xf
#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */
#define PM_UNIT_MSK 0xf
#define PM_BYTE_SH 12 /* Byte number of event bus to use */
#define PM_BYTE_MSK 7
#define PM_GRS_SH 8 /* Storage subsystem mux select */
#define PM_GRS_MSK 7
#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */
#define PM_PMCSEL_MSK 0x7f
/* Values in PM_UNIT field */
#define PM_FPU 0
#define PM_ISU0 1
#define PM_IFU 2
#define PM_ISU1 3
#define PM_IDU 4
#define PM_ISU0_ALT 6
#define PM_GRS 7
#define PM_LSU0 8
#define PM_LSU1 0xc
#define PM_LASTUNIT 0xc
/*
* Bits in MMCR1 for POWER5
*/
#define MMCR1_TTM0SEL_SH 62
#define MMCR1_TTM1SEL_SH 60
#define MMCR1_TTM2SEL_SH 58
#define MMCR1_TTM3SEL_SH 56
#define MMCR1_TTMSEL_MSK 3
#define MMCR1_TD_CP_DBG0SEL_SH 54
#define MMCR1_TD_CP_DBG1SEL_SH 52
#define MMCR1_TD_CP_DBG2SEL_SH 50
#define MMCR1_TD_CP_DBG3SEL_SH 48
#define MMCR1_GRS_L2SEL_SH 46
#define MMCR1_GRS_L2SEL_MSK 3
#define MMCR1_GRS_L3SEL_SH 44
#define MMCR1_GRS_L3SEL_MSK 3
#define MMCR1_GRS_MCSEL_SH 41
#define MMCR1_GRS_MCSEL_MSK 7
#define MMCR1_GRS_FABSEL_SH 39
#define MMCR1_GRS_FABSEL_MSK 3
#define MMCR1_PMC1_ADDER_SEL_SH 35
#define MMCR1_PMC2_ADDER_SEL_SH 34
#define MMCR1_PMC3_ADDER_SEL_SH 33
#define MMCR1_PMC4_ADDER_SEL_SH 32
#define MMCR1_PMC1SEL_SH 25
#define MMCR1_PMC2SEL_SH 17
#define MMCR1_PMC3SEL_SH 9
#define MMCR1_PMC4SEL_SH 1
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
#define MMCR1_PMCSEL_MSK 0x7f
/*
* Bits in MMCRA
*/
/*
* Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000
* 3210987654321098765432109876543210987654321098765432109876543210
* <><>[ ><><>< ><> [ >[ >[ >< >< >< >< ><><><><><><>
* T0T1 NC G0G1G2 G3 UC PS1PS2 B0 B1 B2 B3 P6P5P4P3P2P1
*
* T0 - TTM0 constraint
* 54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000
*
* T1 - TTM1 constraint
* 52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000
*
* NC - number of counters
* 51: NC error 0x0008_0000_0000_0000
* 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
*
* G0..G3 - GRS mux constraints
* 46-47: GRS_L2SEL value
* 44-45: GRS_L3SEL value
* 41-44: GRS_MCSEL value
* 39-40: GRS_FABSEL value
* Note that these match up with their bit positions in MMCR1
*
* UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
* 37: UC3 error 0x20_0000_0000
* 36: FPU|IFU|ISU1 events needed 0x10_0000_0000
* 35: ISU0 events needed 0x08_0000_0000
* 34: IDU|GRS events needed 0x04_0000_0000
*
* PS1
* 33: PS1 error 0x2_0000_0000
* 31-32: count of events needing PMC1/2 0x1_8000_0000
*
* PS2
* 30: PS2 error 0x4000_0000
* 28-29: count of events needing PMC3/4 0x3000_0000
*
* B0
* 24-27: Byte 0 event source 0x0f00_0000
* Encoding as for the event code
*
* B1, B2, B3
* 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
*
* P1..P6
* 0-11: Count of events needing PMC1..PMC6
*/
static const int grsel_shift[8] = {
MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
};
/* Masks and values for using events from the various units */
static u64 unit_cons[PM_LASTUNIT+1][2] = {
[PM_FPU] = { 0xc0002000000000ull, 0x00001000000000ull },
[PM_ISU0] = { 0x00002000000000ull, 0x00000800000000ull },
[PM_ISU1] = { 0xc0002000000000ull, 0xc0001000000000ull },
[PM_IFU] = { 0xc0002000000000ull, 0x80001000000000ull },
[PM_IDU] = { 0x30002000000000ull, 0x00000400000000ull },
[PM_GRS] = { 0x30002000000000ull, 0x30000400000000ull },
};
static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp)
{
int pmc, byte, unit, sh;
int bit, fmask;
u64 mask = 0, value = 0;
int grp = -1;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc > 6)
return -1;
sh = (pmc - 1) * 2;
mask |= 2 << sh;
value |= 1 << sh;
if (pmc <= 4)
grp = (pmc - 1) >> 1;
else if (event != 0x500009 && event != 0x600005)
return -1;
}
if (event & PM_BUSEVENT_MSK) {
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
if (unit > PM_LASTUNIT)
return -1;
if (unit == PM_ISU0_ALT)
unit = PM_ISU0;
mask |= unit_cons[unit][0];
value |= unit_cons[unit][1];
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
if (byte >= 4) {
if (unit != PM_LSU1)
return -1;
/* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
++unit;
byte &= 3;
}
if (unit == PM_GRS) {
bit = event & 7;
fmask = (bit == 6)? 7: 3;
sh = grsel_shift[bit];
mask |= (u64)fmask << sh;
value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
}
/*
* Bus events on bytes 0 and 2 can be counted
* on PMC1/2; bytes 1 and 3 on PMC3/4.
*/
if (!pmc)
grp = byte & 1;
/* Set byte lane select field */
mask |= 0xfULL << (24 - 4 * byte);
value |= (u64)unit << (24 - 4 * byte);
}
if (grp == 0) {
/* increment PMC1/2 field */
mask |= 0x200000000ull;
value |= 0x080000000ull;
} else if (grp == 1) {
/* increment PMC3/4 field */
mask |= 0x40000000ull;
value |= 0x10000000ull;
}
if (pmc < 5) {
/* need a counter from PMC1-4 set */
mask |= 0x8000000000000ull;
value |= 0x1000000000000ull;
}
*maskp = mask;
*valp = value;
return 0;
}
#define MAX_ALT 3 /* at most 3 alternatives for any event */
static const unsigned int event_alternatives[][MAX_ALT] = {
{ 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */
{ 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */
{ 0x100005, 0x600005 }, /* PM_RUN_CYC */
{ 0x100009, 0x200009, 0x500009 }, /* PM_INST_CMPL */
{ 0x300009, 0x400009 }, /* PM_INST_DISP */
};
/*
* Scan the alternatives table for a match and return the
* index into the alternatives table if found, else -1.
*/
static int find_alternative(u64 event)
{
int i, j;
for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
if (event < event_alternatives[i][0])
break;
for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
if (event == event_alternatives[i][j])
return i;
}
return -1;
}
static const unsigned char bytedecode_alternatives[4][4] = {
/* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 },
/* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e },
/* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 },
/* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e }
};
/*
* Some direct events for decodes of event bus byte 3 have alternative
* PMCSEL values on other counters. This returns the alternative
* event code for those that do, or -1 otherwise.
*/
static s64 find_alternative_bdecode(u64 event)
{
int pmc, altpmc, pp, j;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc == 0 || pmc > 4)
return -1;
altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */
pp = event & PM_PMCSEL_MSK;
for (j = 0; j < 4; ++j) {
if (bytedecode_alternatives[pmc - 1][j] == pp) {
return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
(altpmc << PM_PMC_SH) |
bytedecode_alternatives[altpmc - 1][j];
}
}
return -1;
}
static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[])
{
int i, j, nalt = 1;
s64 ae;
alt[0] = event;
nalt = 1;
i = find_alternative(event);
if (i >= 0) {
for (j = 0; j < MAX_ALT; ++j) {
ae = event_alternatives[i][j];
if (ae && ae != event)
alt[nalt++] = ae;
}
} else {
ae = find_alternative_bdecode(event);
if (ae > 0)
alt[nalt++] = ae;
}
return nalt;
}
/*
* Map of which direct events on which PMCs are marked instruction events.
* Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
* Bit 0 is set if it is marked for all PMCs.
* The 0x80 bit indicates a byte decode PMCSEL value.
*/
static unsigned char direct_event_is_marked[0x28] = {
0, /* 00 */
0x1f, /* 01 PM_IOPS_CMPL */
0x2, /* 02 PM_MRK_GRP_DISP */
0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
0, /* 04 */
0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
0x80, /* 06 */
0x80, /* 07 */
0, 0, 0,/* 08 - 0a */
0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
0, /* 0c */
0x80, /* 0d */
0x80, /* 0e */
0, /* 0f */
0, /* 10 */
0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
0, /* 12 */
0x10, /* 13 PM_MRK_GRP_CMPL */
0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
0x2, /* 15 PM_MRK_GRP_ISSUED */
0x80, /* 16 */
0x80, /* 17 */
0, 0, 0, 0, 0,
0x80, /* 1d */
0x80, /* 1e */
0, /* 1f */
0x80, /* 20 */
0x80, /* 21 */
0x80, /* 22 */
0x80, /* 23 */
0x80, /* 24 */
0x80, /* 25 */
0x80, /* 26 */
0x80, /* 27 */
};
/*
* Returns 1 if event counts things relating to marked instructions
* and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
*/
static int power5_marked_instr_event(u64 event)
{
int pmc, psel;
int bit, byte, unit;
u32 mask;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
psel = event & PM_PMCSEL_MSK;
if (pmc >= 5)
return 0;
bit = -1;
if (psel < sizeof(direct_event_is_marked)) {
if (direct_event_is_marked[psel] & (1 << pmc))
return 1;
if (direct_event_is_marked[psel] & 0x80)
bit = 4;
else if (psel == 0x08)
bit = pmc - 1;
else if (psel == 0x10)
bit = 4 - pmc;
else if (psel == 0x1b && (pmc == 1 || pmc == 3))
bit = 4;
} else if ((psel & 0x58) == 0x40)
bit = psel & 7;
if (!(event & PM_BUSEVENT_MSK))
return 0;
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
if (unit == PM_LSU0) {
/* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
mask = 0x5dff00;
} else if (unit == PM_LSU1 && byte >= 4) {
byte -= 4;
/* byte 4 bits 1,3,5,7, byte 5 bits 6-7, byte 7 bits 0-4,6 */
mask = 0x5f00c0aa;
} else
return 0;
return (mask >> (byte * 8 + bit)) & 1;
}
static int power5_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], u64 mmcr[])
{
u64 mmcr1 = 0;
u64 mmcra = 0;
unsigned int pmc, unit, byte, psel;
unsigned int ttm, grp;
int i, isbus, bit, grsel;
unsigned int pmc_inuse = 0;
unsigned int pmc_grp_use[2];
unsigned char busbyte[4];
unsigned char unituse[16];
int ttmuse;
if (n_ev > 6)
return -1;
/* First pass to count resource use */
pmc_grp_use[0] = pmc_grp_use[1] = 0;
memset(busbyte, 0, sizeof(busbyte));
memset(unituse, 0, sizeof(unituse));
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc > 6)
return -1;
if (pmc_inuse & (1 << (pmc - 1)))
return -1;
pmc_inuse |= 1 << (pmc - 1);
/* count 1/2 vs 3/4 use */
if (pmc <= 4)
++pmc_grp_use[(pmc - 1) >> 1];
}
if (event[i] & PM_BUSEVENT_MSK) {
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
if (unit > PM_LASTUNIT)
return -1;
if (unit == PM_ISU0_ALT)
unit = PM_ISU0;
if (byte >= 4) {
if (unit != PM_LSU1)
return -1;
++unit;
byte &= 3;
}
if (!pmc)
++pmc_grp_use[byte & 1];
if (busbyte[byte] && busbyte[byte] != unit)
return -1;
busbyte[byte] = unit;
unituse[unit] = 1;
}
}
if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2)
return -1;
/*
* Assign resources and set multiplexer selects.
*
* PM_ISU0 can go either on TTM0 or TTM1, but that's the only
* choice we have to deal with.
*/
if (unituse[PM_ISU0] &
(unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */
unituse[PM_ISU0] = 0;
}
/* Set TTM[01]SEL fields. */
ttmuse = 0;
for (i = PM_FPU; i <= PM_ISU1; ++i) {
if (!unituse[i])
continue;
if (ttmuse++)
return -1;
mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH;
}
ttmuse = 0;
for (; i <= PM_GRS; ++i) {
if (!unituse[i])
continue;
if (ttmuse++)
return -1;
mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH;
}
if (ttmuse > 1)
return -1;
/* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
for (byte = 0; byte < 4; ++byte) {
unit = busbyte[byte];
if (!unit)
continue;
if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
/* get ISU0 through TTM1 rather than TTM0 */
unit = PM_ISU0_ALT;
} else if (unit == PM_LSU1 + 1) {
/* select lower word of LSU1 for this byte */
mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
}
ttm = unit >> 2;
mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
}
/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
psel = event[i] & PM_PMCSEL_MSK;
isbus = event[i] & PM_BUSEVENT_MSK;
if (!pmc) {
/* Bus event or any-PMC direct event */
for (pmc = 0; pmc < 4; ++pmc) {
if (pmc_inuse & (1 << pmc))
continue;
grp = (pmc >> 1) & 1;
if (isbus) {
if (grp == (byte & 1))
break;
} else if (pmc_grp_use[grp] < 2) {
++pmc_grp_use[grp];
break;
}
}
pmc_inuse |= 1 << pmc;
} else if (pmc <= 4) {
/* Direct event */
--pmc;
if ((psel == 8 || psel == 0x10) && isbus && (byte & 2))
/* add events on higher-numbered bus */
mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
} else {
/* Instructions or run cycles on PMC5/6 */
--pmc;
}
if (isbus && unit == PM_GRS) {
bit = psel & 7;
grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
mmcr1 |= (u64)grsel << grsel_shift[bit];
}
if (power5_marked_instr_event(event[i]))
mmcra |= MMCRA_SAMPLE_ENABLE;
if (pmc <= 3)
mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
hwc[i] = pmc;
}
/* Return MMCRx values */
mmcr[0] = 0;
if (pmc_inuse & 1)
mmcr[0] = MMCR0_PMC1CE;
if (pmc_inuse & 0x3e)
mmcr[0] |= MMCR0_PMCjCE;
mmcr[1] = mmcr1;
mmcr[2] = mmcra;
return 0;
}
static void power5_disable_pmc(unsigned int pmc, u64 mmcr[])
{
if (pmc <= 3)
mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
}
static int power5_generic_events[] = {
[PERF_COUNT_HW_CPU_CYCLES] = 0xf,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x100009,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */
[PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */
[PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */
};
#define C(x) PERF_COUNT_HW_CACHE_##x
/*
* Table of generalized cache-related events.
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x4c1090, 0x3c1088 },
[C(OP_WRITE)] = { 0x3c1090, 0xc10c3 },
[C(OP_PREFETCH)] = { 0xc70e7, 0 },
},
[C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { 0, 0 },
},
[C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x3c309b },
[C(OP_WRITE)] = { 0, 0 },
[C(OP_PREFETCH)] = { 0xc50c3, 0 },
},
[C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x2c4090, 0x800c4 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x800c0 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x230e4, 0x230e5 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
};
struct power_pmu power5_pmu = {
.n_counter = 6,
.max_alternatives = MAX_ALT,
.add_fields = 0x7000090000555ull,
.test_adder = 0x3000490000000ull,
.compute_mmcr = power5_compute_mmcr,
.get_constraint = power5_get_constraint,
.get_alternatives = power5_get_alternatives,
.disable_pmc = power5_disable_pmc,
.n_generic = ARRAY_SIZE(power5_generic_events),
.generic_events = power5_generic_events,
.cache_events = &power5_cache_events,
};

View file

@ -0,0 +1,532 @@
/*
* Performance counter support for POWER6 processors.
*
* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/perf_counter.h>
#include <asm/reg.h>
/*
* Bits in event code for POWER6
*/
#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
#define PM_PMC_MSK 0x7
#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
#define PM_UNIT_SH 16 /* Unit event comes (TTMxSEL encoding) */
#define PM_UNIT_MSK 0xf
#define PM_UNIT_MSKS (PM_UNIT_MSK << PM_UNIT_SH)
#define PM_LLAV 0x8000 /* Load lookahead match value */
#define PM_LLA 0x4000 /* Load lookahead match enable */
#define PM_BYTE_SH 12 /* Byte of event bus to use */
#define PM_BYTE_MSK 3
#define PM_SUBUNIT_SH 8 /* Subunit event comes from (NEST_SEL enc.) */
#define PM_SUBUNIT_MSK 7
#define PM_SUBUNIT_MSKS (PM_SUBUNIT_MSK << PM_SUBUNIT_SH)
#define PM_PMCSEL_MSK 0xff /* PMCxSEL value */
#define PM_BUSEVENT_MSK 0xf3700
/*
* Bits in MMCR1 for POWER6
*/
#define MMCR1_TTM0SEL_SH 60
#define MMCR1_TTMSEL_SH(n) (MMCR1_TTM0SEL_SH - (n) * 4)
#define MMCR1_TTMSEL_MSK 0xf
#define MMCR1_TTMSEL(m, n) (((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK)
#define MMCR1_NESTSEL_SH 45
#define MMCR1_NESTSEL_MSK 0x7
#define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK)
#define MMCR1_PMC1_LLA ((u64)1 << 44)
#define MMCR1_PMC1_LLA_VALUE ((u64)1 << 39)
#define MMCR1_PMC1_ADDR_SEL ((u64)1 << 35)
#define MMCR1_PMC1SEL_SH 24
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
#define MMCR1_PMCSEL_MSK 0xff
/*
* Map of which direct events on which PMCs are marked instruction events.
* Indexed by PMCSEL value >> 1.
* Bottom 4 bits are a map of which PMCs are interesting,
* top 4 bits say what sort of event:
* 0 = direct marked event,
* 1 = byte decode event,
* 4 = add/and event (PMC1 -> bits 0 & 4),
* 5 = add/and event (PMC1 -> bits 1 & 5),
* 6 = add/and event (PMC1 -> bits 2 & 6),
* 7 = add/and event (PMC1 -> bits 3 & 7).
*/
static unsigned char direct_event_is_marked[0x60 >> 1] = {
0, /* 00 */
0, /* 02 */
0, /* 04 */
0x07, /* 06 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
0x04, /* 08 PM_MRK_DFU_FIN */
0x06, /* 0a PM_MRK_IFU_FIN, PM_MRK_INST_FIN */
0, /* 0c */
0, /* 0e */
0x02, /* 10 PM_MRK_INST_DISP */
0x08, /* 12 PM_MRK_LSU_DERAT_MISS */
0, /* 14 */
0, /* 16 */
0x0c, /* 18 PM_THRESH_TIMEO, PM_MRK_INST_FIN */
0x0f, /* 1a PM_MRK_INST_DISP, PM_MRK_{FXU,FPU,LSU}_FIN */
0x01, /* 1c PM_MRK_INST_ISSUED */
0, /* 1e */
0, /* 20 */
0, /* 22 */
0, /* 24 */
0, /* 26 */
0x15, /* 28 PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L3MISS */
0, /* 2a */
0, /* 2c */
0, /* 2e */
0x4f, /* 30 */
0x7f, /* 32 */
0x4f, /* 34 */
0x5f, /* 36 */
0x6f, /* 38 */
0x4f, /* 3a */
0, /* 3c */
0x08, /* 3e PM_MRK_INST_TIMEO */
0x1f, /* 40 */
0x1f, /* 42 */
0x1f, /* 44 */
0x1f, /* 46 */
0x1f, /* 48 */
0x1f, /* 4a */
0x1f, /* 4c */
0x1f, /* 4e */
0, /* 50 */
0x05, /* 52 PM_MRK_BR_TAKEN, PM_MRK_BR_MPRED */
0x1c, /* 54 PM_MRK_PTEG_FROM_L3MISS, PM_MRK_PTEG_FROM_L2MISS */
0x02, /* 56 PM_MRK_LD_MISS_L1 */
0, /* 58 */
0, /* 5a */
0, /* 5c */
0, /* 5e */
};
/*
* Masks showing for each unit which bits are marked events.
* These masks are in LE order, i.e. 0x00000001 is byte 0, bit 0.
*/
static u32 marked_bus_events[16] = {
0x01000000, /* direct events set 1: byte 3 bit 0 */
0x00010000, /* direct events set 2: byte 2 bit 0 */
0, 0, 0, 0, /* IDU, IFU, nest: nothing */
0x00000088, /* VMX set 1: byte 0 bits 3, 7 */
0x000000c0, /* VMX set 2: byte 0 bits 4-7 */
0x04010000, /* LSU set 1: byte 2 bit 0, byte 3 bit 2 */
0xff010000u, /* LSU set 2: byte 2 bit 0, all of byte 3 */
0, /* LSU set 3 */
0x00000010, /* VMX set 3: byte 0 bit 4 */
0, /* BFP set 1 */
0x00000022, /* BFP set 2: byte 0 bits 1, 5 */
0, 0
};
/*
* Returns 1 if event counts things relating to marked instructions
* and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
*/
static int power6_marked_instr_event(u64 event)
{
int pmc, psel, ptype;
int bit, byte, unit;
u32 mask;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
psel = (event & PM_PMCSEL_MSK) >> 1; /* drop edge/level bit */
if (pmc >= 5)
return 0;
bit = -1;
if (psel < sizeof(direct_event_is_marked)) {
ptype = direct_event_is_marked[psel];
if (pmc == 0 || !(ptype & (1 << (pmc - 1))))
return 0;
ptype >>= 4;
if (ptype == 0)
return 1;
if (ptype == 1)
bit = 0;
else
bit = ptype ^ (pmc - 1);
} else if ((psel & 0x48) == 0x40)
bit = psel & 7;
if (!(event & PM_BUSEVENT_MSK) || bit == -1)
return 0;
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
mask = marked_bus_events[unit];
return (mask >> (byte * 8 + bit)) & 1;
}
/*
* Assign PMC numbers and compute MMCR1 value for a set of events
*/
static int p6_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], u64 mmcr[])
{
u64 mmcr1 = 0;
u64 mmcra = 0;
int i;
unsigned int pmc, ev, b, u, s, psel;
unsigned int ttmset = 0;
unsigned int pmc_inuse = 0;
if (n_ev > 6)
return -1;
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc_inuse & (1 << (pmc - 1)))
return -1; /* collision! */
pmc_inuse |= 1 << (pmc - 1);
}
}
for (i = 0; i < n_ev; ++i) {
ev = event[i];
pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
--pmc;
} else {
/* can go on any PMC; find a free one */
for (pmc = 0; pmc < 4; ++pmc)
if (!(pmc_inuse & (1 << pmc)))
break;
if (pmc >= 4)
return -1;
pmc_inuse |= 1 << pmc;
}
hwc[i] = pmc;
psel = ev & PM_PMCSEL_MSK;
if (ev & PM_BUSEVENT_MSK) {
/* this event uses the event bus */
b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK;
u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK;
/* check for conflict on this byte of event bus */
if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u)
return -1;
mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b);
ttmset |= 1 << b;
if (u == 5) {
/* Nest events have a further mux */
s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
if ((ttmset & 0x10) &&
MMCR1_NESTSEL(mmcr1) != s)
return -1;
ttmset |= 0x10;
mmcr1 |= (u64)s << MMCR1_NESTSEL_SH;
}
if (0x30 <= psel && psel <= 0x3d) {
/* these need the PMCx_ADDR_SEL bits */
if (b >= 2)
mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc;
}
/* bus select values are different for PMC3/4 */
if (pmc >= 2 && (psel & 0x90) == 0x80)
psel ^= 0x20;
}
if (ev & PM_LLA) {
mmcr1 |= MMCR1_PMC1_LLA >> pmc;
if (ev & PM_LLAV)
mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc;
}
if (power6_marked_instr_event(event[i]))
mmcra |= MMCRA_SAMPLE_ENABLE;
if (pmc < 4)
mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc);
}
mmcr[0] = 0;
if (pmc_inuse & 1)
mmcr[0] = MMCR0_PMC1CE;
if (pmc_inuse & 0xe)
mmcr[0] |= MMCR0_PMCjCE;
mmcr[1] = mmcr1;
mmcr[2] = mmcra;
return 0;
}
/*
* Layout of constraint bits:
*
* 0-1 add field: number of uses of PMC1 (max 1)
* 2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6
* 12-15 add field: number of uses of PMC1-4 (max 4)
* 16-19 select field: unit on byte 0 of event bus
* 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
* 32-34 select field: nest (subunit) event selector
*/
static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp)
{
int pmc, byte, sh, subunit;
u64 mask = 0, value = 0;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc > 4 && !(event == 0x500009 || event == 0x600005))
return -1;
sh = (pmc - 1) * 2;
mask |= 2 << sh;
value |= 1 << sh;
}
if (event & PM_BUSEVENT_MSK) {
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
sh = byte * 4 + (16 - PM_UNIT_SH);
mask |= PM_UNIT_MSKS << sh;
value |= (u64)(event & PM_UNIT_MSKS) << sh;
if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) {
subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
mask |= (u64)PM_SUBUNIT_MSK << 32;
value |= (u64)subunit << 32;
}
}
if (pmc <= 4) {
mask |= 0x8000; /* add field for count of PMC1-4 uses */
value |= 0x1000;
}
*maskp = mask;
*valp = value;
return 0;
}
static int p6_limited_pmc_event(u64 event)
{
int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
return pmc == 5 || pmc == 6;
}
#define MAX_ALT 4 /* at most 4 alternatives for any event */
static const unsigned int event_alternatives[][MAX_ALT] = {
{ 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */
{ 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */
{ 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */
{ 0x10000a, 0x2000f4, 0x600005 }, /* PM_RUN_CYC */
{ 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */
{ 0x10000e, 0x400010 }, /* PM_PURR */
{ 0x100010, 0x4000f8 }, /* PM_FLUSH */
{ 0x10001a, 0x200010 }, /* PM_MRK_INST_DISP */
{ 0x100026, 0x3000f8 }, /* PM_TB_BIT_TRANS */
{ 0x100054, 0x2000f0 }, /* PM_ST_FIN */
{ 0x100056, 0x2000fc }, /* PM_L1_ICACHE_MISS */
{ 0x1000f0, 0x40000a }, /* PM_INST_IMC_MATCH_CMPL */
{ 0x1000f8, 0x200008 }, /* PM_GCT_EMPTY_CYC */
{ 0x1000fc, 0x400006 }, /* PM_LSU_DERAT_MISS_CYC */
{ 0x20000e, 0x400007 }, /* PM_LSU_DERAT_MISS */
{ 0x200012, 0x300012 }, /* PM_INST_DISP */
{ 0x2000f2, 0x3000f2 }, /* PM_INST_DISP */
{ 0x2000f8, 0x300010 }, /* PM_EXT_INT */
{ 0x2000fe, 0x300056 }, /* PM_DATA_FROM_L2MISS */
{ 0x2d0030, 0x30001a }, /* PM_MRK_FPU_FIN */
{ 0x30000a, 0x400018 }, /* PM_MRK_INST_FIN */
{ 0x3000f6, 0x40000e }, /* PM_L1_DCACHE_RELOAD_VALID */
{ 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */
};
/*
* This could be made more efficient with a binary search on
* a presorted list, if necessary
*/
static int find_alternatives_list(u64 event)
{
int i, j;
unsigned int alt;
for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
if (event < event_alternatives[i][0])
return -1;
for (j = 0; j < MAX_ALT; ++j) {
alt = event_alternatives[i][j];
if (!alt || event < alt)
break;
if (event == alt)
return i;
}
}
return -1;
}
static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[])
{
int i, j, nlim;
unsigned int psel, pmc;
unsigned int nalt = 1;
u64 aevent;
alt[0] = event;
nlim = p6_limited_pmc_event(event);
/* check the alternatives table */
i = find_alternatives_list(event);
if (i >= 0) {
/* copy out alternatives from list */
for (j = 0; j < MAX_ALT; ++j) {
aevent = event_alternatives[i][j];
if (!aevent)
break;
if (aevent != event)
alt[nalt++] = aevent;
nlim += p6_limited_pmc_event(aevent);
}
} else {
/* Check for alternative ways of computing sum events */
/* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */
psel = event & (PM_PMCSEL_MSK & ~1); /* ignore edge bit */
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc && (psel == 0x32 || psel == 0x34))
alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) |
((5 - pmc) << PM_PMC_SH);
/* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */
if (pmc && (psel == 0x38 || psel == 0x3a))
alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) |
((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH);
}
if (flags & PPMU_ONLY_COUNT_RUN) {
/*
* We're only counting in RUN state,
* so PM_CYC is equivalent to PM_RUN_CYC,
* PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR.
* This doesn't include alternatives that don't provide
* any extra flexibility in assigning PMCs (e.g.
* 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC).
* Note that even with these additional alternatives
* we never end up with more than 4 alternatives for any event.
*/
j = nalt;
for (i = 0; i < nalt; ++i) {
switch (alt[i]) {
case 0x1e: /* PM_CYC */
alt[j++] = 0x600005; /* PM_RUN_CYC */
++nlim;
break;
case 0x10000a: /* PM_RUN_CYC */
alt[j++] = 0x1e; /* PM_CYC */
break;
case 2: /* PM_INST_CMPL */
alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */
++nlim;
break;
case 0x500009: /* PM_RUN_INST_CMPL */
alt[j++] = 2; /* PM_INST_CMPL */
break;
case 0x10000e: /* PM_PURR */
alt[j++] = 0x4000f4; /* PM_RUN_PURR */
break;
case 0x4000f4: /* PM_RUN_PURR */
alt[j++] = 0x10000e; /* PM_PURR */
break;
}
}
nalt = j;
}
if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
/* remove the limited PMC events */
j = 0;
for (i = 0; i < nalt; ++i) {
if (!p6_limited_pmc_event(alt[i])) {
alt[j] = alt[i];
++j;
}
}
nalt = j;
} else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
/* remove all but the limited PMC events */
j = 0;
for (i = 0; i < nalt; ++i) {
if (p6_limited_pmc_event(alt[i])) {
alt[j] = alt[i];
++j;
}
}
nalt = j;
}
return nalt;
}
static void p6_disable_pmc(unsigned int pmc, u64 mmcr[])
{
/* Set PMCxSEL to 0 to disable PMCx */
if (pmc <= 3)
mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
}
static int power6_generic_events[] = {
[PERF_COUNT_HW_CPU_CYCLES] = 0x1e,
[PERF_COUNT_HW_INSTRUCTIONS] = 2,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */
[PERF_COUNT_HW_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */
[PERF_COUNT_HW_BRANCH_MISSES] = 0x400052, /* BR_MPRED */
};
#define C(x) PERF_COUNT_HW_CACHE_##x
/*
* Table of generalized cache-related events.
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
* The "DTLB" and "ITLB" events relate to the DERAT and IERAT.
*/
static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x80082, 0x80080 },
[C(OP_WRITE)] = { 0x80086, 0x80088 },
[C(OP_PREFETCH)] = { 0x810a4, 0 },
},
[C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x100056 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { 0x4008c, 0 },
},
[C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x150730, 0x250532 },
[C(OP_WRITE)] = { 0x250432, 0x150432 },
[C(OP_PREFETCH)] = { 0x810a6, 0 },
},
[C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x20000e },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x420ce },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x430e6, 0x400052 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
};
struct power_pmu power6_pmu = {
.n_counter = 6,
.max_alternatives = MAX_ALT,
.add_fields = 0x1555,
.test_adder = 0x3000,
.compute_mmcr = p6_compute_mmcr,
.get_constraint = p6_get_constraint,
.get_alternatives = p6_get_alternatives,
.disable_pmc = p6_disable_pmc,
.limited_pmc_event = p6_limited_pmc_event,
.flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR,
.n_generic = ARRAY_SIZE(power6_generic_events),
.generic_events = power6_generic_events,
.cache_events = &power6_cache_events,
};

View file

@ -0,0 +1,357 @@
/*
* Performance counter support for POWER7 processors.
*
* Copyright 2009 Paul Mackerras, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/perf_counter.h>
#include <asm/reg.h>
/*
* Bits in event code for POWER7
*/
#define PM_PMC_SH 16 /* PMC number (1-based) for direct events */
#define PM_PMC_MSK 0xf
#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
#define PM_UNIT_SH 12 /* TTMMUX number and setting - unit select */
#define PM_UNIT_MSK 0xf
#define PM_COMBINE_SH 11 /* Combined event bit */
#define PM_COMBINE_MSK 1
#define PM_COMBINE_MSKS 0x800
#define PM_L2SEL_SH 8 /* L2 event select */
#define PM_L2SEL_MSK 7
#define PM_PMCSEL_MSK 0xff
/*
* Bits in MMCR1 for POWER7
*/
#define MMCR1_TTM0SEL_SH 60
#define MMCR1_TTM1SEL_SH 56
#define MMCR1_TTM2SEL_SH 52
#define MMCR1_TTM3SEL_SH 48
#define MMCR1_TTMSEL_MSK 0xf
#define MMCR1_L2SEL_SH 45
#define MMCR1_L2SEL_MSK 7
#define MMCR1_PMC1_COMBINE_SH 35
#define MMCR1_PMC2_COMBINE_SH 34
#define MMCR1_PMC3_COMBINE_SH 33
#define MMCR1_PMC4_COMBINE_SH 32
#define MMCR1_PMC1SEL_SH 24
#define MMCR1_PMC2SEL_SH 16
#define MMCR1_PMC3SEL_SH 8
#define MMCR1_PMC4SEL_SH 0
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
#define MMCR1_PMCSEL_MSK 0xff
/*
* Bits in MMCRA
*/
/*
* Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000
* 3210987654321098765432109876543210987654321098765432109876543210
* [ ><><><><><><>
* NC P6P5P4P3P2P1
*
* NC - number of counters
* 15: NC error 0x8000
* 12-14: number of events needing PMC1-4 0x7000
*
* P6
* 11: P6 error 0x800
* 10-11: Count of events needing PMC6
*
* P1..P5
* 0-9: Count of events needing PMC1..PMC5
*/
static int power7_get_constraint(u64 event, u64 *maskp, u64 *valp)
{
int pmc, sh;
u64 mask = 0, value = 0;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc > 6)
return -1;
sh = (pmc - 1) * 2;
mask |= 2 << sh;
value |= 1 << sh;
if (pmc >= 5 && !(event == 0x500fa || event == 0x600f4))
return -1;
}
if (pmc < 5) {
/* need a counter from PMC1-4 set */
mask |= 0x8000;
value |= 0x1000;
}
*maskp = mask;
*valp = value;
return 0;
}
#define MAX_ALT 2 /* at most 2 alternatives for any event */
static const unsigned int event_alternatives[][MAX_ALT] = {
{ 0x200f2, 0x300f2 }, /* PM_INST_DISP */
{ 0x200f4, 0x600f4 }, /* PM_RUN_CYC */
{ 0x400fa, 0x500fa }, /* PM_RUN_INST_CMPL */
};
/*
* Scan the alternatives table for a match and return the
* index into the alternatives table if found, else -1.
*/
static int find_alternative(u64 event)
{
int i, j;
for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
if (event < event_alternatives[i][0])
break;
for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
if (event == event_alternatives[i][j])
return i;
}
return -1;
}
static s64 find_alternative_decode(u64 event)
{
int pmc, psel;
/* this only handles the 4x decode events */
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
psel = event & PM_PMCSEL_MSK;
if ((pmc == 2 || pmc == 4) && (psel & ~7) == 0x40)
return event - (1 << PM_PMC_SH) + 8;
if ((pmc == 1 || pmc == 3) && (psel & ~7) == 0x48)
return event + (1 << PM_PMC_SH) - 8;
return -1;
}
static int power7_get_alternatives(u64 event, unsigned int flags, u64 alt[])
{
int i, j, nalt = 1;
s64 ae;
alt[0] = event;
nalt = 1;
i = find_alternative(event);
if (i >= 0) {
for (j = 0; j < MAX_ALT; ++j) {
ae = event_alternatives[i][j];
if (ae && ae != event)
alt[nalt++] = ae;
}
} else {
ae = find_alternative_decode(event);
if (ae > 0)
alt[nalt++] = ae;
}
if (flags & PPMU_ONLY_COUNT_RUN) {
/*
* We're only counting in RUN state,
* so PM_CYC is equivalent to PM_RUN_CYC
* and PM_INST_CMPL === PM_RUN_INST_CMPL.
* This doesn't include alternatives that don't provide
* any extra flexibility in assigning PMCs.
*/
j = nalt;
for (i = 0; i < nalt; ++i) {
switch (alt[i]) {
case 0x1e: /* PM_CYC */
alt[j++] = 0x600f4; /* PM_RUN_CYC */
break;
case 0x600f4: /* PM_RUN_CYC */
alt[j++] = 0x1e;
break;
case 0x2: /* PM_PPC_CMPL */
alt[j++] = 0x500fa; /* PM_RUN_INST_CMPL */
break;
case 0x500fa: /* PM_RUN_INST_CMPL */
alt[j++] = 0x2; /* PM_PPC_CMPL */
break;
}
}
nalt = j;
}
return nalt;
}
/*
* Returns 1 if event counts things relating to marked instructions
* and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
*/
static int power7_marked_instr_event(u64 event)
{
int pmc, psel;
int unit;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
psel = event & PM_PMCSEL_MSK & ~1; /* trim off edge/level bit */
if (pmc >= 5)
return 0;
switch (psel >> 4) {
case 2:
return pmc == 2 || pmc == 4;
case 3:
if (psel == 0x3c)
return pmc == 1;
if (psel == 0x3e)
return pmc != 2;
return 1;
case 4:
case 5:
return unit == 0xd;
case 6:
if (psel == 0x64)
return pmc >= 3;
case 8:
return unit == 0xd;
}
return 0;
}
static int power7_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], u64 mmcr[])
{
u64 mmcr1 = 0;
u64 mmcra = 0;
unsigned int pmc, unit, combine, l2sel, psel;
unsigned int pmc_inuse = 0;
int i;
/* First pass to count resource use */
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc > 6)
return -1;
if (pmc_inuse & (1 << (pmc - 1)))
return -1;
pmc_inuse |= 1 << (pmc - 1);
}
}
/* Second pass: assign PMCs, set all MMCR1 fields */
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
combine = (event[i] >> PM_COMBINE_SH) & PM_COMBINE_MSK;
l2sel = (event[i] >> PM_L2SEL_SH) & PM_L2SEL_MSK;
psel = event[i] & PM_PMCSEL_MSK;
if (!pmc) {
/* Bus event or any-PMC direct event */
for (pmc = 0; pmc < 4; ++pmc) {
if (!(pmc_inuse & (1 << pmc)))
break;
}
if (pmc >= 4)
return -1;
pmc_inuse |= 1 << pmc;
} else {
/* Direct or decoded event */
--pmc;
}
if (pmc <= 3) {
mmcr1 |= (u64) unit << (MMCR1_TTM0SEL_SH - 4 * pmc);
mmcr1 |= (u64) combine << (MMCR1_PMC1_COMBINE_SH - pmc);
mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
if (unit == 6) /* L2 events */
mmcr1 |= (u64) l2sel << MMCR1_L2SEL_SH;
}
if (power7_marked_instr_event(event[i]))
mmcra |= MMCRA_SAMPLE_ENABLE;
hwc[i] = pmc;
}
/* Return MMCRx values */
mmcr[0] = 0;
if (pmc_inuse & 1)
mmcr[0] = MMCR0_PMC1CE;
if (pmc_inuse & 0x3e)
mmcr[0] |= MMCR0_PMCjCE;
mmcr[1] = mmcr1;
mmcr[2] = mmcra;
return 0;
}
static void power7_disable_pmc(unsigned int pmc, u64 mmcr[])
{
if (pmc <= 3)
mmcr[1] &= ~(0xffULL << MMCR1_PMCSEL_SH(pmc));
}
static int power7_generic_events[] = {
[PERF_COUNT_CPU_CYCLES] = 0x1e,
[PERF_COUNT_INSTRUCTIONS] = 2,
[PERF_COUNT_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU */
[PERF_COUNT_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */
[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */
[PERF_COUNT_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */
};
#define C(x) PERF_COUNT_HW_CACHE_##x
/*
* Table of generalized cache-related events.
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x400f0, 0xc880 },
[C(OP_WRITE)] = { 0, 0x300f0 },
[C(OP_PREFETCH)] = { 0xd8b8, 0 },
},
[C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x200fc },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { 0x408a, 0 },
},
[C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x6080, 0x6084 },
[C(OP_WRITE)] = { 0x6082, 0x6086 },
[C(OP_PREFETCH)] = { 0, 0 },
},
[C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x300fc },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x400fc },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x10068, 0x400f6 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
};
struct power_pmu power7_pmu = {
.n_counter = 6,
.max_alternatives = MAX_ALT + 1,
.add_fields = 0x1555ull,
.test_adder = 0x3000ull,
.compute_mmcr = power7_compute_mmcr,
.get_constraint = power7_get_constraint,
.get_alternatives = power7_get_alternatives,
.disable_pmc = power7_disable_pmc,
.n_generic = ARRAY_SIZE(power7_generic_events),
.generic_events = power7_generic_events,
.cache_events = &power7_cache_events,
};

View file

@ -0,0 +1,482 @@
/*
* Performance counter support for PPC970-family processors.
*
* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/string.h>
#include <linux/perf_counter.h>
#include <asm/reg.h>
/*
* Bits in event code for PPC970
*/
#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
#define PM_PMC_MSK 0xf
#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
#define PM_UNIT_MSK 0xf
#define PM_SPCSEL_SH 6
#define PM_SPCSEL_MSK 3
#define PM_BYTE_SH 4 /* Byte number of event bus to use */
#define PM_BYTE_MSK 3
#define PM_PMCSEL_MSK 0xf
/* Values in PM_UNIT field */
#define PM_NONE 0
#define PM_FPU 1
#define PM_VPU 2
#define PM_ISU 3
#define PM_IFU 4
#define PM_IDU 5
#define PM_STS 6
#define PM_LSU0 7
#define PM_LSU1U 8
#define PM_LSU1L 9
#define PM_LASTUNIT 9
/*
* Bits in MMCR0 for PPC970
*/
#define MMCR0_PMC1SEL_SH 8
#define MMCR0_PMC2SEL_SH 1
#define MMCR_PMCSEL_MSK 0x1f
/*
* Bits in MMCR1 for PPC970
*/
#define MMCR1_TTM0SEL_SH 62
#define MMCR1_TTM1SEL_SH 59
#define MMCR1_TTM3SEL_SH 53
#define MMCR1_TTMSEL_MSK 3
#define MMCR1_TD_CP_DBG0SEL_SH 50
#define MMCR1_TD_CP_DBG1SEL_SH 48
#define MMCR1_TD_CP_DBG2SEL_SH 46
#define MMCR1_TD_CP_DBG3SEL_SH 44
#define MMCR1_PMC1_ADDER_SEL_SH 39
#define MMCR1_PMC2_ADDER_SEL_SH 38
#define MMCR1_PMC6_ADDER_SEL_SH 37
#define MMCR1_PMC5_ADDER_SEL_SH 36
#define MMCR1_PMC8_ADDER_SEL_SH 35
#define MMCR1_PMC7_ADDER_SEL_SH 34
#define MMCR1_PMC3_ADDER_SEL_SH 33
#define MMCR1_PMC4_ADDER_SEL_SH 32
#define MMCR1_PMC3SEL_SH 27
#define MMCR1_PMC4SEL_SH 22
#define MMCR1_PMC5SEL_SH 17
#define MMCR1_PMC6SEL_SH 12
#define MMCR1_PMC7SEL_SH 7
#define MMCR1_PMC8SEL_SH 2
static short mmcr1_adder_bits[8] = {
MMCR1_PMC1_ADDER_SEL_SH,
MMCR1_PMC2_ADDER_SEL_SH,
MMCR1_PMC3_ADDER_SEL_SH,
MMCR1_PMC4_ADDER_SEL_SH,
MMCR1_PMC5_ADDER_SEL_SH,
MMCR1_PMC6_ADDER_SEL_SH,
MMCR1_PMC7_ADDER_SEL_SH,
MMCR1_PMC8_ADDER_SEL_SH
};
/*
* Bits in MMCRA
*/
/*
* Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000
* 3210987654321098765432109876543210987654321098765432109876543210
* <><><>[ >[ >[ >< >< >< >< ><><><><><><><><>
* SPT0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
*
* SP - SPCSEL constraint
* 48-49: SPCSEL value 0x3_0000_0000_0000
*
* T0 - TTM0 constraint
* 46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000
*
* T1 - TTM1 constraint
* 44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000
*
* UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS
* 43: UC3 error 0x0800_0000_0000
* 42: FPU|IFU|VPU events needed 0x0400_0000_0000
* 41: ISU events needed 0x0200_0000_0000
* 40: IDU|STS events needed 0x0100_0000_0000
*
* PS1
* 39: PS1 error 0x0080_0000_0000
* 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
*
* PS2
* 35: PS2 error 0x0008_0000_0000
* 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
*
* B0
* 28-31: Byte 0 event source 0xf000_0000
* Encoding as for the event code
*
* B1, B2, B3
* 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
*
* P1
* 15: P1 error 0x8000
* 14-15: Count of events needing PMC1
*
* P2..P8
* 0-13: Count of events needing PMC2..PMC8
*/
static unsigned char direct_marked_event[8] = {
(1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
(1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
(1<<3) | (1<<5), /* PMC3: PM_MRK_ST_CMPL_INT, PM_MRK_VMX_FIN */
(1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
(1<<4) | (1<<5), /* PMC5: PM_GRP_MRK, PM_MRK_GRP_TIMEO */
(1<<3) | (1<<4) | (1<<5),
/* PMC6: PM_MRK_ST_STS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
(1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
(1<<4) /* PMC8: PM_MRK_LSU_FIN */
};
/*
* Returns 1 if event counts things relating to marked instructions
* and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
*/
static int p970_marked_instr_event(u64 event)
{
int pmc, psel, unit, byte, bit;
unsigned int mask;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
psel = event & PM_PMCSEL_MSK;
if (pmc) {
if (direct_marked_event[pmc - 1] & (1 << psel))
return 1;
if (psel == 0) /* add events */
bit = (pmc <= 4)? pmc - 1: 8 - pmc;
else if (psel == 7 || psel == 13) /* decode events */
bit = 4;
else
return 0;
} else
bit = psel;
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
mask = 0;
switch (unit) {
case PM_VPU:
mask = 0x4c; /* byte 0 bits 2,3,6 */
case PM_LSU0:
/* byte 2 bits 0,2,3,4,6; all of byte 1 */
mask = 0x085dff00;
case PM_LSU1L:
mask = 0x50 << 24; /* byte 3 bits 4,6 */
break;
}
return (mask >> (byte * 8 + bit)) & 1;
}
/* Masks and values for using events from the various units */
static u64 unit_cons[PM_LASTUNIT+1][2] = {
[PM_FPU] = { 0xc80000000000ull, 0x040000000000ull },
[PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull },
[PM_ISU] = { 0x080000000000ull, 0x020000000000ull },
[PM_IFU] = { 0xc80000000000ull, 0x840000000000ull },
[PM_IDU] = { 0x380000000000ull, 0x010000000000ull },
[PM_STS] = { 0x380000000000ull, 0x310000000000ull },
};
static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp)
{
int pmc, byte, unit, sh, spcsel;
u64 mask = 0, value = 0;
int grp = -1;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc > 8)
return -1;
sh = (pmc - 1) * 2;
mask |= 2 << sh;
value |= 1 << sh;
grp = ((pmc - 1) >> 1) & 1;
}
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
if (unit) {
if (unit > PM_LASTUNIT)
return -1;
mask |= unit_cons[unit][0];
value |= unit_cons[unit][1];
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
/*
* Bus events on bytes 0 and 2 can be counted
* on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
*/
if (!pmc)
grp = byte & 1;
/* Set byte lane select field */
mask |= 0xfULL << (28 - 4 * byte);
value |= (u64)unit << (28 - 4 * byte);
}
if (grp == 0) {
/* increment PMC1/2/5/6 field */
mask |= 0x8000000000ull;
value |= 0x1000000000ull;
} else if (grp == 1) {
/* increment PMC3/4/7/8 field */
mask |= 0x800000000ull;
value |= 0x100000000ull;
}
spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
if (spcsel) {
mask |= 3ull << 48;
value |= (u64)spcsel << 48;
}
*maskp = mask;
*valp = value;
return 0;
}
static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[])
{
alt[0] = event;
/* 2 alternatives for LSU empty */
if (event == 0x2002 || event == 0x3002) {
alt[1] = event ^ 0x1000;
return 2;
}
return 1;
}
static int p970_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], u64 mmcr[])
{
u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0;
unsigned int pmc, unit, byte, psel;
unsigned int ttm, grp;
unsigned int pmc_inuse = 0;
unsigned int pmc_grp_use[2];
unsigned char busbyte[4];
unsigned char unituse[16];
unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 };
unsigned char ttmuse[2];
unsigned char pmcsel[8];
int i;
int spcsel;
if (n_ev > 8)
return -1;
/* First pass to count resource use */
pmc_grp_use[0] = pmc_grp_use[1] = 0;
memset(busbyte, 0, sizeof(busbyte));
memset(unituse, 0, sizeof(unituse));
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc_inuse & (1 << (pmc - 1)))
return -1;
pmc_inuse |= 1 << (pmc - 1);
/* count 1/2/5/6 vs 3/4/7/8 use */
++pmc_grp_use[((pmc - 1) >> 1) & 1];
}
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
if (unit) {
if (unit > PM_LASTUNIT)
return -1;
if (!pmc)
++pmc_grp_use[byte & 1];
if (busbyte[byte] && busbyte[byte] != unit)
return -1;
busbyte[byte] = unit;
unituse[unit] = 1;
}
}
if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
return -1;
/*
* Assign resources and set multiplexer selects.
*
* PM_ISU can go either on TTM0 or TTM1, but that's the only
* choice we have to deal with.
*/
if (unituse[PM_ISU] &
(unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU]))
unitmap[PM_ISU] = 2 | 4; /* move ISU to TTM1 */
/* Set TTM[01]SEL fields. */
ttmuse[0] = ttmuse[1] = 0;
for (i = PM_FPU; i <= PM_STS; ++i) {
if (!unituse[i])
continue;
ttm = unitmap[i];
++ttmuse[(ttm >> 2) & 1];
mmcr1 |= (u64)(ttm & ~4) << MMCR1_TTM1SEL_SH;
}
/* Check only one unit per TTMx */
if (ttmuse[0] > 1 || ttmuse[1] > 1)
return -1;
/* Set byte lane select fields and TTM3SEL. */
for (byte = 0; byte < 4; ++byte) {
unit = busbyte[byte];
if (!unit)
continue;
if (unit <= PM_STS)
ttm = (unitmap[unit] >> 2) & 1;
else if (unit == PM_LSU0)
ttm = 2;
else {
ttm = 3;
if (unit == PM_LSU1L && byte >= 2)
mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
}
mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
}
/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
memset(pmcsel, 0x8, sizeof(pmcsel)); /* 8 means don't count */
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
psel = event[i] & PM_PMCSEL_MSK;
if (!pmc) {
/* Bus event or any-PMC direct event */
if (unit)
psel |= 0x10 | ((byte & 2) << 2);
else
psel |= 8;
for (pmc = 0; pmc < 8; ++pmc) {
if (pmc_inuse & (1 << pmc))
continue;
grp = (pmc >> 1) & 1;
if (unit) {
if (grp == (byte & 1))
break;
} else if (pmc_grp_use[grp] < 4) {
++pmc_grp_use[grp];
break;
}
}
pmc_inuse |= 1 << pmc;
} else {
/* Direct event */
--pmc;
if (psel == 0 && (byte & 2))
/* add events on higher-numbered bus */
mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
}
pmcsel[pmc] = psel;
hwc[i] = pmc;
spcsel = (event[i] >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
mmcr1 |= spcsel;
if (p970_marked_instr_event(event[i]))
mmcra |= MMCRA_SAMPLE_ENABLE;
}
for (pmc = 0; pmc < 2; ++pmc)
mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc);
for (; pmc < 8; ++pmc)
mmcr1 |= (u64)pmcsel[pmc] << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
if (pmc_inuse & 1)
mmcr0 |= MMCR0_PMC1CE;
if (pmc_inuse & 0xfe)
mmcr0 |= MMCR0_PMCjCE;
mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
/* Return MMCRx values */
mmcr[0] = mmcr0;
mmcr[1] = mmcr1;
mmcr[2] = mmcra;
return 0;
}
static void p970_disable_pmc(unsigned int pmc, u64 mmcr[])
{
int shift, i;
if (pmc <= 1) {
shift = MMCR0_PMC1SEL_SH - 7 * pmc;
i = 0;
} else {
shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2);
i = 1;
}
/*
* Setting the PMCxSEL field to 0x08 disables PMC x.
*/
mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift);
}
static int ppc970_generic_events[] = {
[PERF_COUNT_HW_CPU_CYCLES] = 7,
[PERF_COUNT_HW_INSTRUCTIONS] = 1,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */
[PERF_COUNT_HW_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */
[PERF_COUNT_HW_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */
};
#define C(x) PERF_COUNT_HW_CACHE_##x
/*
* Table of generalized cache-related events.
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x8810, 0x3810 },
[C(OP_WRITE)] = { 0x7810, 0x813 },
[C(OP_PREFETCH)] = { 0x731, 0 },
},
[C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { 0, 0 },
},
[C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0 },
[C(OP_WRITE)] = { 0, 0 },
[C(OP_PREFETCH)] = { 0x733, 0 },
},
[C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x704 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x700 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x431, 0x327 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
};
struct power_pmu ppc970_pmu = {
.n_counter = 8,
.max_alternatives = 2,
.add_fields = 0x001100005555ull,
.test_adder = 0x013300000000ull,
.compute_mmcr = p970_compute_mmcr,
.get_constraint = p970_get_constraint,
.get_alternatives = p970_get_alternatives,
.disable_pmc = p970_disable_pmc,
.n_generic = ARRAY_SIZE(ppc970_generic_events),
.generic_events = ppc970_generic_events,
.cache_events = &ppc970_cache_events,
};

View file

@ -29,6 +29,7 @@
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
#include <linux/perf_counter.h>
#include <asm/firmware.h>
#include <asm/page.h>
@ -170,6 +171,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
die("Weird page fault", regs, SIGSEGV);
}
perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
@ -309,6 +312,8 @@ good_area:
}
if (ret & VM_FAULT_MAJOR) {
current->maj_flt++;
perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
regs, address);
#ifdef CONFIG_PPC_SMLPAR
if (firmware_has_feature(FW_FEATURE_CMO)) {
preempt_disable();
@ -316,8 +321,11 @@ good_area:
preempt_enable();
}
#endif
} else
} else {
current->min_flt++;
perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
regs, address);
}
up_read(&mm->mmap_sem);
return 0;

View file

@ -1,6 +1,7 @@
config PPC64
bool "64-bit kernel"
default n
select HAVE_PERF_COUNTERS
help
This option selects whether a 32-bit or a 64-bit kernel
will be built.

View file

@ -739,6 +739,7 @@ config X86_UP_IOAPIC
config X86_LOCAL_APIC
def_bool y
depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
select HAVE_PERF_COUNTERS if (!M386 && !M486)
config X86_IO_APIC
def_bool y

View file

@ -825,10 +825,11 @@ ia32_sys_call_table:
.quad compat_sys_signalfd4
.quad sys_eventfd2
.quad sys_epoll_create1
.quad sys_dup3 /* 330 */
.quad sys_dup3 /* 330 */
.quad sys_pipe2
.quad sys_inotify_init1
.quad compat_sys_preadv
.quad compat_sys_pwritev
.quad compat_sys_rt_tgsigqueueinfo /* 335 */
.quad sys_perf_counter_open
ia32_syscall_end:

View file

@ -247,5 +247,241 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
#define smp_mb__before_atomic_inc() barrier()
#define smp_mb__after_atomic_inc() barrier()
/* An 64bit atomic type */
typedef struct {
unsigned long long counter;
} atomic64_t;
#define ATOMIC64_INIT(val) { (val) }
/**
* atomic64_read - read atomic64 variable
* @v: pointer of type atomic64_t
*
* Atomically reads the value of @v.
* Doesn't imply a read memory barrier.
*/
#define __atomic64_read(ptr) ((ptr)->counter)
static inline unsigned long long
cmpxchg8b(unsigned long long *ptr, unsigned long long old, unsigned long long new)
{
asm volatile(
LOCK_PREFIX "cmpxchg8b (%[ptr])\n"
: "=A" (old)
: [ptr] "D" (ptr),
"A" (old),
"b" (ll_low(new)),
"c" (ll_high(new))
: "memory");
return old;
}
static inline unsigned long long
atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val,
unsigned long long new_val)
{
return cmpxchg8b(&ptr->counter, old_val, new_val);
}
/**
* atomic64_xchg - xchg atomic64 variable
* @ptr: pointer to type atomic64_t
* @new_val: value to assign
* @old_val: old value that was there
*
* Atomically xchgs the value of @ptr to @new_val and returns
* the old value.
*/
static inline unsigned long long
atomic64_xchg(atomic64_t *ptr, unsigned long long new_val)
{
unsigned long long old_val;
do {
old_val = atomic_read(ptr);
} while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
return old_val;
}
/**
* atomic64_set - set atomic64 variable
* @ptr: pointer to type atomic64_t
* @new_val: value to assign
*
* Atomically sets the value of @ptr to @new_val.
*/
static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val)
{
atomic64_xchg(ptr, new_val);
}
/**
* atomic64_read - read atomic64 variable
* @ptr: pointer to type atomic64_t
*
* Atomically reads the value of @ptr and returns it.
*/
static inline unsigned long long atomic64_read(atomic64_t *ptr)
{
unsigned long long curr_val;
do {
curr_val = __atomic64_read(ptr);
} while (atomic64_cmpxchg(ptr, curr_val, curr_val) != curr_val);
return curr_val;
}
/**
* atomic64_add_return - add and return
* @delta: integer value to add
* @ptr: pointer to type atomic64_t
*
* Atomically adds @delta to @ptr and returns @delta + *@ptr
*/
static inline unsigned long long
atomic64_add_return(unsigned long long delta, atomic64_t *ptr)
{
unsigned long long old_val, new_val;
do {
old_val = atomic_read(ptr);
new_val = old_val + delta;
} while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
return new_val;
}
static inline long atomic64_sub_return(unsigned long long delta, atomic64_t *ptr)
{
return atomic64_add_return(-delta, ptr);
}
static inline long atomic64_inc_return(atomic64_t *ptr)
{
return atomic64_add_return(1, ptr);
}
static inline long atomic64_dec_return(atomic64_t *ptr)
{
return atomic64_sub_return(1, ptr);
}
/**
* atomic64_add - add integer to atomic64 variable
* @delta: integer value to add
* @ptr: pointer to type atomic64_t
*
* Atomically adds @delta to @ptr.
*/
static inline void atomic64_add(unsigned long long delta, atomic64_t *ptr)
{
atomic64_add_return(delta, ptr);
}
/**
* atomic64_sub - subtract the atomic64 variable
* @delta: integer value to subtract
* @ptr: pointer to type atomic64_t
*
* Atomically subtracts @delta from @ptr.
*/
static inline void atomic64_sub(unsigned long long delta, atomic64_t *ptr)
{
atomic64_add(-delta, ptr);
}
/**
* atomic64_sub_and_test - subtract value from variable and test result
* @delta: integer value to subtract
* @ptr: pointer to type atomic64_t
*
* Atomically subtracts @delta from @ptr and returns
* true if the result is zero, or false for all
* other cases.
*/
static inline int
atomic64_sub_and_test(unsigned long long delta, atomic64_t *ptr)
{
unsigned long long old_val = atomic64_sub_return(delta, ptr);
return old_val == 0;
}
/**
* atomic64_inc - increment atomic64 variable
* @ptr: pointer to type atomic64_t
*
* Atomically increments @ptr by 1.
*/
static inline void atomic64_inc(atomic64_t *ptr)
{
atomic64_add(1, ptr);
}
/**
* atomic64_dec - decrement atomic64 variable
* @ptr: pointer to type atomic64_t
*
* Atomically decrements @ptr by 1.
*/
static inline void atomic64_dec(atomic64_t *ptr)
{
atomic64_sub(1, ptr);
}
/**
* atomic64_dec_and_test - decrement and test
* @ptr: pointer to type atomic64_t
*
* Atomically decrements @ptr by 1 and
* returns true if the result is 0, or false for all other
* cases.
*/
static inline int atomic64_dec_and_test(atomic64_t *ptr)
{
return atomic64_sub_and_test(1, ptr);
}
/**
* atomic64_inc_and_test - increment and test
* @ptr: pointer to type atomic64_t
*
* Atomically increments @ptr by 1
* and returns true if the result is zero, or false for all
* other cases.
*/
static inline int atomic64_inc_and_test(atomic64_t *ptr)
{
return atomic64_sub_and_test(-1, ptr);
}
/**
* atomic64_add_negative - add and test if negative
* @delta: integer value to add
* @ptr: pointer to type atomic64_t
*
* Atomically adds @delta to @ptr and returns true
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
static inline int
atomic64_add_negative(unsigned long long delta, atomic64_t *ptr)
{
long long old_val = atomic64_add_return(delta, ptr);
return old_val < 0;
}
#include <asm-generic/atomic.h>
#endif /* _ASM_X86_ATOMIC_32_H */

View file

@ -49,7 +49,7 @@ BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
#ifdef CONFIG_PERF_COUNTERS
BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR)
BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
#endif
#ifdef CONFIG_X86_MCE_P4THERMAL

View file

@ -13,6 +13,8 @@ typedef struct {
unsigned int irq_spurious_count;
#endif
unsigned int generic_irqs; /* arch dependent */
unsigned int apic_perf_irqs;
unsigned int apic_pending_irqs;
#ifdef CONFIG_SMP
unsigned int irq_resched_count;
unsigned int irq_call_count;

View file

@ -29,6 +29,8 @@
extern void apic_timer_interrupt(void);
extern void generic_interrupt(void);
extern void error_interrupt(void);
extern void perf_pending_interrupt(void);
extern void spurious_interrupt(void);
extern void thermal_interrupt(void);
extern void reschedule_interrupt(void);

View file

@ -1,31 +0,0 @@
#ifndef _ASM_X86_INTEL_ARCH_PERFMON_H
#define _ASM_X86_INTEL_ARCH_PERFMON_H
#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
#define MSR_ARCH_PERFMON_PERFCTR1 0xc2
#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c)
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0)
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
union cpuid10_eax {
struct {
unsigned int version_id:8;
unsigned int num_counters:8;
unsigned int bit_width:8;
unsigned int mask_length:8;
} split;
unsigned int full;
};
#endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */

View file

@ -107,16 +107,16 @@
*/
#define LOCAL_TIMER_VECTOR 0xef
/*
* Performance monitoring interrupt vector:
*/
#define LOCAL_PERF_VECTOR 0xee
/*
* Generic system vector for platform specific use
*/
#define GENERIC_INTERRUPT_VECTOR 0xed
/*
* Performance monitoring pending work vector:
*/
#define LOCAL_PENDING_VECTOR 0xec
/*
* First APIC vector available to drivers: (vectors 0x30-0xee) we
* start at 0x31(0x41) to spread out vectors evenly between priority

View file

@ -0,0 +1,100 @@
#ifndef _ASM_X86_PERF_COUNTER_H
#define _ASM_X86_PERF_COUNTER_H
/*
* Performance counter hw details:
*/
#define X86_PMC_MAX_GENERIC 8
#define X86_PMC_MAX_FIXED 3
#define X86_PMC_IDX_GENERIC 0
#define X86_PMC_IDX_FIXED 32
#define X86_PMC_IDX_MAX 64
#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
#define MSR_ARCH_PERFMON_PERFCTR1 0xc2
#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
/*
* Includes eventsel and unit mask as well:
*/
#define ARCH_PERFMON_EVENT_MASK 0xffff
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6
/*
* Intel "Architectural Performance Monitoring" CPUID
* detection/enumeration details:
*/
union cpuid10_eax {
struct {
unsigned int version_id:8;
unsigned int num_counters:8;
unsigned int bit_width:8;
unsigned int mask_length:8;
} split;
unsigned int full;
};
union cpuid10_edx {
struct {
unsigned int num_counters_fixed:4;
unsigned int reserved:28;
} split;
unsigned int full;
};
/*
* Fixed-purpose performance counters:
*/
/*
* All 3 fixed-mode PMCs are configured via this single MSR:
*/
#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
/*
* The counts are available in three separate MSRs:
*/
/* Instr_Retired.Any: */
#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309
#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0)
/* CPU_CLK_Unhalted.Core: */
#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1)
/* CPU_CLK_Unhalted.Ref: */
#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2)
extern void set_perf_counter_pending(void);
#define clear_perf_counter_pending() do { } while (0)
#define test_perf_counter_pending() (0)
#ifdef CONFIG_PERF_COUNTERS
extern void init_hw_perf_counters(void);
extern void perf_counters_lapic_init(void);
#else
static inline void init_hw_perf_counters(void) { }
static inline void perf_counters_lapic_init(void) { }
#endif
#endif /* _ASM_X86_PERF_COUNTER_H */

View file

@ -341,6 +341,7 @@
#define __NR_preadv 333
#define __NR_pwritev 334
#define __NR_rt_tgsigqueueinfo 335
#define __NR_perf_counter_open 336
#ifdef __KERNEL__

View file

@ -659,7 +659,8 @@ __SYSCALL(__NR_preadv, sys_preadv)
__SYSCALL(__NR_pwritev, sys_pwritev)
#define __NR_rt_tgsigqueueinfo 297
__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
#define __NR_perf_counter_open 298
__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR

View file

@ -14,6 +14,7 @@
* Mikael Pettersson : PM converted to driver model.
*/
#include <linux/perf_counter.h>
#include <linux/kernel_stat.h>
#include <linux/mc146818rtc.h>
#include <linux/acpi_pmtmr.h>
@ -34,6 +35,7 @@
#include <linux/smp.h>
#include <linux/mm.h>
#include <asm/perf_counter.h>
#include <asm/pgalloc.h>
#include <asm/atomic.h>
#include <asm/mpspec.h>
@ -1187,6 +1189,7 @@ void __cpuinit setup_local_APIC(void)
apic_write(APIC_ESR, 0);
}
#endif
perf_counters_lapic_init();
preempt_disable();

View file

@ -1,5 +1,5 @@
#
# Makefile for x86-compatible CPU details and quirks
# Makefile for x86-compatible CPU details, features and quirks
#
# Don't trace early stages of a secondary CPU boot
@ -23,11 +23,13 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
obj-$(CONFIG_X86_MCE) += mcheck/
obj-$(CONFIG_MTRR) += mtrr/
obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
obj-$(CONFIG_X86_MCE) += mcheck/
obj-$(CONFIG_MTRR) += mtrr/
obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
quiet_cmd_mkcapflags = MKCAP $@
cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@

View file

@ -13,6 +13,7 @@
#include <linux/io.h>
#include <asm/stackprotector.h>
#include <asm/perf_counter.h>
#include <asm/mmu_context.h>
#include <asm/hypervisor.h>
#include <asm/processor.h>
@ -874,6 +875,7 @@ void __init identify_boot_cpu(void)
#else
vgetcpu_set_mode();
#endif
init_hw_perf_counters();
}
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)

File diff suppressed because it is too large Load diff

View file

@ -19,8 +19,8 @@
#include <linux/nmi.h>
#include <linux/kprobes.h>
#include <asm/genapic.h>
#include <asm/intel_arch_perfmon.h>
#include <asm/apic.h>
#include <asm/perf_counter.h>
struct nmi_watchdog_ctlblk {
unsigned int cccr_msr;

View file

@ -1012,6 +1012,11 @@ apicinterrupt ERROR_APIC_VECTOR \
apicinterrupt SPURIOUS_APIC_VECTOR \
spurious_interrupt smp_spurious_interrupt
#ifdef CONFIG_PERF_COUNTERS
apicinterrupt LOCAL_PENDING_VECTOR \
perf_pending_interrupt smp_perf_pending_interrupt
#endif
/*
* Exception entry points.
*/

View file

@ -62,6 +62,14 @@ static int show_other_interrupts(struct seq_file *p, int prec)
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
seq_printf(p, " Spurious interrupts\n");
seq_printf(p, "%*s: ", prec, "CNT");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
seq_printf(p, " Performance counter interrupts\n");
seq_printf(p, "%*s: ", prec, "PND");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
seq_printf(p, " Performance pending work\n");
#endif
if (generic_interrupt_extension) {
seq_printf(p, "%*s: ", prec, "PLT");
@ -165,6 +173,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
#ifdef CONFIG_X86_LOCAL_APIC
sum += irq_stats(cpu)->apic_timer_irqs;
sum += irq_stats(cpu)->irq_spurious_count;
sum += irq_stats(cpu)->apic_perf_irqs;
sum += irq_stats(cpu)->apic_pending_irqs;
#endif
if (generic_interrupt_extension)
sum += irq_stats(cpu)->generic_irqs;

View file

@ -181,10 +181,15 @@ static void __init apic_intr_init(void)
{
smp_intr_init();
#ifdef CONFIG_X86_64
#ifdef CONFIG_X86_THERMAL_VECTOR
alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
#endif
#ifdef CONFIG_X86_THRESHOLD
alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
#endif
#if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC)
alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt);
#endif
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
/* self generated IPI for local APIC timer */
@ -199,17 +204,9 @@ static void __init apic_intr_init(void)
/* Performance monitoring interrupts: */
# ifdef CONFIG_PERF_COUNTERS
alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
# endif
#endif
#ifdef CONFIG_X86_32
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
/* thermal monitor LVT interrupt */
alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
#endif
#endif
}

View file

@ -6,7 +6,6 @@
* 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
* 2000-2002 x86-64 support by Andi Kleen
*/
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/smp.h>

View file

@ -335,3 +335,4 @@ ENTRY(sys_call_table)
.long sys_preadv
.long sys_pwritev
.long sys_rt_tgsigqueueinfo /* 335 */
.long sys_perf_counter_open

View file

@ -942,8 +942,13 @@ void __init trap_init(void)
#endif
set_intr_gate(19, &simd_coprocessor_error);
/* Reserve all the builtin and the syscall vector: */
for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
set_bit(i, used_vectors);
#ifdef CONFIG_IA32_EMULATION
set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
set_bit(IA32_SYSCALL_VECTOR, used_vectors);
#endif
#ifdef CONFIG_X86_32
@ -960,14 +965,9 @@ void __init trap_init(void)
}
set_system_trap_gate(SYSCALL_VECTOR, &system_call);
set_bit(SYSCALL_VECTOR, used_vectors);
#endif
/* Reserve all the builtin and the syscall vector: */
for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
set_bit(i, used_vectors);
set_bit(IA32_SYSCALL_VECTOR, used_vectors);
/*
* Should be a barrier for any external CPU state:
*/

View file

@ -10,6 +10,7 @@
#include <linux/bootmem.h> /* max_low_pfn */
#include <linux/kprobes.h> /* __kprobes, ... */
#include <linux/mmiotrace.h> /* kmmio_handler, ... */
#include <linux/perf_counter.h> /* perf_swcounter_event */
#include <asm/traps.h> /* dotraplinkage, ... */
#include <asm/pgalloc.h> /* pgd_*(), ... */
@ -1013,6 +1014,8 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (unlikely(error_code & PF_RSVD))
pgtable_bad(regs, error_code, address);
perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
/*
* If we're in an interrupt, have no user context or are running
* in an atomic region then we must not take the fault:
@ -1106,10 +1109,15 @@ good_area:
return;
}
if (fault & VM_FAULT_MAJOR)
if (fault & VM_FAULT_MAJOR) {
tsk->maj_flt++;
else
perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
regs, address);
} else {
tsk->min_flt++;
perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
regs, address);
}
check_v8086_mode(regs, address, tsk);

View file

@ -40,8 +40,9 @@ static int profile_exceptions_notify(struct notifier_block *self,
switch (val) {
case DIE_NMI:
if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
ret = NOTIFY_STOP;
case DIE_NMI_IPI:
model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu));
ret = NOTIFY_STOP;
break;
default:
break;
@ -134,7 +135,7 @@ static void nmi_cpu_setup(void *dummy)
static struct notifier_block profile_exceptions_nb = {
.notifier_call = profile_exceptions_notify,
.next = NULL,
.priority = 0
.priority = 2
};
static int nmi_setup(void)

View file

@ -18,7 +18,7 @@
#include <asm/msr.h>
#include <asm/apic.h>
#include <asm/nmi.h>
#include <asm/intel_arch_perfmon.h>
#include <asm/perf_counter.h>
#include "op_x86_model.h"
#include "op_counter.h"
@ -136,6 +136,13 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
u64 val;
int i;
/*
* This can happen if perf counters are in use when
* we steal the die notifier NMI.
*/
if (unlikely(!reset_value))
goto out;
for (i = 0 ; i < num_counters; ++i) {
if (!reset_value[i])
continue;
@ -146,6 +153,7 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
}
}
out:
/* Only P6 based Pentium M need to re-unmask the apic vector but it
* doesn't hurt other P6 variant */
apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);

View file

@ -338,6 +338,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
}
}
current->mm->context.vdso = (void *)addr;
if (compat_uses_vma || !compat) {
/*
* MAYWRITE to allow gdb to COW and set breakpoints
@ -358,11 +360,13 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
goto up_fail;
}
current->mm->context.vdso = (void *)addr;
current_thread_info()->sysenter_return =
VDSO32_SYMBOL(addr, SYSENTER_RETURN);
up_fail:
if (ret)
current->mm->context.vdso = NULL;
up_write(&mm->mmap_sem);
return ret;

View file

@ -116,15 +116,18 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
goto up_fail;
}
current->mm->context.vdso = (void *)addr;
ret = install_special_mapping(mm, addr, vdso_size,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
VM_ALWAYSDUMP,
vdso_pages);
if (ret)
if (ret) {
current->mm->context.vdso = NULL;
goto up_fail;
}
current->mm->context.vdso = (void *)addr;
up_fail:
up_write(&mm->mmap_sem);
return ret;

View file

@ -25,6 +25,7 @@
#include <linux/kbd_kern.h>
#include <linux/proc_fs.h>
#include <linux/quotaops.h>
#include <linux/perf_counter.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/suspend.h>
@ -243,6 +244,7 @@ static void sysrq_handle_showregs(int key, struct tty_struct *tty)
struct pt_regs *regs = get_irq_regs();
if (regs)
show_regs(regs);
perf_counter_print_debug();
}
static struct sysrq_key_op sysrq_showregs_op = {
.handler = sysrq_handle_showregs,

View file

@ -33,6 +33,7 @@
#include <linux/string.h>
#include <linux/init.h>
#include <linux/pagemap.h>
#include <linux/perf_counter.h>
#include <linux/highmem.h>
#include <linux/spinlock.h>
#include <linux/key.h>
@ -922,6 +923,7 @@ void set_task_comm(struct task_struct *tsk, char *buf)
task_lock(tsk);
strlcpy(tsk->comm, buf, sizeof(tsk->comm));
task_unlock(tsk);
perf_counter_comm(tsk);
}
int flush_old_exec(struct linux_binprm * bprm)
@ -990,6 +992,13 @@ int flush_old_exec(struct linux_binprm * bprm)
current->personality &= ~bprm->per_clear;
/*
* Flush performance counters when crossing a
* security domain:
*/
if (!get_dumpable(current->mm))
perf_counter_exit_task(current);
/* An exec changes our domain. We are no longer part of the thread
group */

View file

@ -134,7 +134,7 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
#define atomic_long_cmpxchg(l, old, new) \
(atomic64_cmpxchg((atomic64_t *)(l), (old), (new)))
#define atomic_long_xchg(v, new) \
(atomic64_xchg((atomic64_t *)(l), (new)))
(atomic64_xchg((atomic64_t *)(v), (new)))
#else /* BITS_PER_LONG == 64 */

View file

@ -108,6 +108,15 @@ extern struct group_info init_groups;
extern struct cred init_cred;
#ifdef CONFIG_PERF_COUNTERS
# define INIT_PERF_COUNTERS(tsk) \
.perf_counter_mutex = \
__MUTEX_INITIALIZER(tsk.perf_counter_mutex), \
.perf_counter_list = LIST_HEAD_INIT(tsk.perf_counter_list),
#else
# define INIT_PERF_COUNTERS(tsk)
#endif
/*
* INIT_TASK is used to set up the first task table, touch at
* your own risk!. Base=0, limit=0x1fffff (=2MB)
@ -171,6 +180,7 @@ extern struct cred init_cred;
}, \
.dirties = INIT_PROP_LOCAL_SINGLE(dirties), \
INIT_IDS \
INIT_PERF_COUNTERS(tsk) \
INIT_TRACE_IRQFLAGS \
INIT_LOCKDEP \
INIT_FTRACE_GRAPH \

View file

@ -81,7 +81,12 @@ static inline unsigned int kstat_irqs(unsigned int irq)
return sum;
}
/*
* Lock/unlock the current runqueue - to extract task statistics:
*/
extern unsigned long long task_delta_exec(struct task_struct *);
extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
extern void account_steal_time(cputime_t);

View file

@ -0,0 +1,697 @@
/*
* Performance counters:
*
* Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
* Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
*
* Data type definitions, declarations, prototypes.
*
* Started by: Thomas Gleixner and Ingo Molnar
*
* For licencing details see kernel-base/COPYING
*/
#ifndef _LINUX_PERF_COUNTER_H
#define _LINUX_PERF_COUNTER_H
#include <linux/types.h>
#include <linux/ioctl.h>
#include <asm/byteorder.h>
/*
* User-space ABI bits:
*/
/*
* attr.type
*/
enum perf_type_id {
PERF_TYPE_HARDWARE = 0,
PERF_TYPE_SOFTWARE = 1,
PERF_TYPE_TRACEPOINT = 2,
PERF_TYPE_HW_CACHE = 3,
PERF_TYPE_RAW = 4,
PERF_TYPE_MAX, /* non-ABI */
};
/*
* Generalized performance counter event types, used by the
* attr.event_id parameter of the sys_perf_counter_open()
* syscall:
*/
enum perf_hw_id {
/*
* Common hardware events, generalized by the kernel:
*/
PERF_COUNT_HW_CPU_CYCLES = 0,
PERF_COUNT_HW_INSTRUCTIONS = 1,
PERF_COUNT_HW_CACHE_REFERENCES = 2,
PERF_COUNT_HW_CACHE_MISSES = 3,
PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4,
PERF_COUNT_HW_BRANCH_MISSES = 5,
PERF_COUNT_HW_BUS_CYCLES = 6,
PERF_COUNT_HW_MAX, /* non-ABI */
};
/*
* Generalized hardware cache counters:
*
* { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
* { read, write, prefetch } x
* { accesses, misses }
*/
enum perf_hw_cache_id {
PERF_COUNT_HW_CACHE_L1D = 0,
PERF_COUNT_HW_CACHE_L1I = 1,
PERF_COUNT_HW_CACHE_LL = 2,
PERF_COUNT_HW_CACHE_DTLB = 3,
PERF_COUNT_HW_CACHE_ITLB = 4,
PERF_COUNT_HW_CACHE_BPU = 5,
PERF_COUNT_HW_CACHE_MAX, /* non-ABI */
};
enum perf_hw_cache_op_id {
PERF_COUNT_HW_CACHE_OP_READ = 0,
PERF_COUNT_HW_CACHE_OP_WRITE = 1,
PERF_COUNT_HW_CACHE_OP_PREFETCH = 2,
PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */
};
enum perf_hw_cache_op_result_id {
PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0,
PERF_COUNT_HW_CACHE_RESULT_MISS = 1,
PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */
};
/*
* Special "software" counters provided by the kernel, even if the hardware
* does not support performance counters. These counters measure various
* physical and sw events of the kernel (and allow the profiling of them as
* well):
*/
enum perf_sw_ids {
PERF_COUNT_SW_CPU_CLOCK = 0,
PERF_COUNT_SW_TASK_CLOCK = 1,
PERF_COUNT_SW_PAGE_FAULTS = 2,
PERF_COUNT_SW_CONTEXT_SWITCHES = 3,
PERF_COUNT_SW_CPU_MIGRATIONS = 4,
PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
PERF_COUNT_SW_MAX, /* non-ABI */
};
/*
* Bits that can be set in attr.sample_type to request information
* in the overflow packets.
*/
enum perf_counter_sample_format {
PERF_SAMPLE_IP = 1U << 0,
PERF_SAMPLE_TID = 1U << 1,
PERF_SAMPLE_TIME = 1U << 2,
PERF_SAMPLE_ADDR = 1U << 3,
PERF_SAMPLE_GROUP = 1U << 4,
PERF_SAMPLE_CALLCHAIN = 1U << 5,
PERF_SAMPLE_ID = 1U << 6,
PERF_SAMPLE_CPU = 1U << 7,
PERF_SAMPLE_PERIOD = 1U << 8,
};
/*
* Bits that can be set in attr.read_format to request that
* reads on the counter should return the indicated quantities,
* in increasing order of bit value, after the counter value.
*/
enum perf_counter_read_format {
PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0,
PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
PERF_FORMAT_ID = 1U << 2,
};
/*
* Hardware event to monitor via a performance monitoring counter:
*/
struct perf_counter_attr {
/*
* Major type: hardware/software/tracepoint/etc.
*/
__u32 type;
__u32 __reserved_1;
/*
* Type specific configuration information.
*/
__u64 config;
union {
__u64 sample_period;
__u64 sample_freq;
};
__u64 sample_type;
__u64 read_format;
__u64 disabled : 1, /* off by default */
inherit : 1, /* children inherit it */
pinned : 1, /* must always be on PMU */
exclusive : 1, /* only group on PMU */
exclude_user : 1, /* don't count user */
exclude_kernel : 1, /* ditto kernel */
exclude_hv : 1, /* ditto hypervisor */
exclude_idle : 1, /* don't count when idle */
mmap : 1, /* include mmap data */
comm : 1, /* include comm data */
freq : 1, /* use freq, not period */
__reserved_2 : 53;
__u32 wakeup_events; /* wakeup every n events */
__u32 __reserved_3;
__u64 __reserved_4;
};
/*
* Ioctls that can be done on a perf counter fd:
*/
#define PERF_COUNTER_IOC_ENABLE _IO ('$', 0)
#define PERF_COUNTER_IOC_DISABLE _IO ('$', 1)
#define PERF_COUNTER_IOC_REFRESH _IO ('$', 2)
#define PERF_COUNTER_IOC_RESET _IO ('$', 3)
#define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64)
enum perf_counter_ioc_flags {
PERF_IOC_FLAG_GROUP = 1U << 0,
};
/*
* Structure of the page that can be mapped via mmap
*/
struct perf_counter_mmap_page {
__u32 version; /* version number of this structure */
__u32 compat_version; /* lowest version this is compat with */
/*
* Bits needed to read the hw counters in user-space.
*
* u32 seq;
* s64 count;
*
* do {
* seq = pc->lock;
*
* barrier()
* if (pc->index) {
* count = pmc_read(pc->index - 1);
* count += pc->offset;
* } else
* goto regular_read;
*
* barrier();
* } while (pc->lock != seq);
*
* NOTE: for obvious reason this only works on self-monitoring
* processes.
*/
__u32 lock; /* seqlock for synchronization */
__u32 index; /* hardware counter identifier */
__s64 offset; /* add to hardware counter value */
/*
* Control data for the mmap() data buffer.
*
* User-space reading this value should issue an rmb(), on SMP capable
* platforms, after reading this value -- see perf_counter_wakeup().
*/
__u64 data_head; /* head in the data section */
};
#define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0)
#define PERF_EVENT_MISC_CPUMODE_UNKNOWN (0 << 0)
#define PERF_EVENT_MISC_KERNEL (1 << 0)
#define PERF_EVENT_MISC_USER (2 << 0)
#define PERF_EVENT_MISC_HYPERVISOR (3 << 0)
#define PERF_EVENT_MISC_OVERFLOW (1 << 2)
struct perf_event_header {
__u32 type;
__u16 misc;
__u16 size;
};
enum perf_event_type {
/*
* The MMAP events record the PROT_EXEC mappings so that we can
* correlate userspace IPs to code. They have the following structure:
*
* struct {
* struct perf_event_header header;
*
* u32 pid, tid;
* u64 addr;
* u64 len;
* u64 pgoff;
* char filename[];
* };
*/
PERF_EVENT_MMAP = 1,
/*
* struct {
* struct perf_event_header header;
*
* u32 pid, tid;
* char comm[];
* };
*/
PERF_EVENT_COMM = 3,
/*
* struct {
* struct perf_event_header header;
* u64 time;
* u64 id;
* u64 sample_period;
* };
*/
PERF_EVENT_PERIOD = 4,
/*
* struct {
* struct perf_event_header header;
* u64 time;
* u64 id;
* };
*/
PERF_EVENT_THROTTLE = 5,
PERF_EVENT_UNTHROTTLE = 6,
/*
* struct {
* struct perf_event_header header;
* u32 pid, ppid;
* };
*/
PERF_EVENT_FORK = 7,
/*
* When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
* will be PERF_RECORD_*
*
* struct {
* struct perf_event_header header;
*
* { u64 ip; } && PERF_RECORD_IP
* { u32 pid, tid; } && PERF_RECORD_TID
* { u64 time; } && PERF_RECORD_TIME
* { u64 addr; } && PERF_RECORD_ADDR
* { u64 config; } && PERF_RECORD_CONFIG
* { u32 cpu, res; } && PERF_RECORD_CPU
*
* { u64 nr;
* { u64 id, val; } cnt[nr]; } && PERF_RECORD_GROUP
*
* { u16 nr,
* hv,
* kernel,
* user;
* u64 ips[nr]; } && PERF_RECORD_CALLCHAIN
* };
*/
};
#ifdef __KERNEL__
/*
* Kernel-internal data types and definitions:
*/
#ifdef CONFIG_PERF_COUNTERS
# include <asm/perf_counter.h>
#endif
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/spinlock.h>
#include <linux/hrtimer.h>
#include <linux/fs.h>
#include <linux/pid_namespace.h>
#include <asm/atomic.h>
struct task_struct;
/**
* struct hw_perf_counter - performance counter hardware details:
*/
struct hw_perf_counter {
#ifdef CONFIG_PERF_COUNTERS
union {
struct { /* hardware */
u64 config;
unsigned long config_base;
unsigned long counter_base;
int idx;
};
union { /* software */
atomic64_t count;
struct hrtimer hrtimer;
};
};
atomic64_t prev_count;
u64 sample_period;
u64 last_period;
atomic64_t period_left;
u64 interrupts;
u64 freq_count;
u64 freq_interrupts;
u64 freq_stamp;
#endif
};
struct perf_counter;
/**
* struct pmu - generic performance monitoring unit
*/
struct pmu {
int (*enable) (struct perf_counter *counter);
void (*disable) (struct perf_counter *counter);
void (*read) (struct perf_counter *counter);
void (*unthrottle) (struct perf_counter *counter);
};
/**
* enum perf_counter_active_state - the states of a counter
*/
enum perf_counter_active_state {
PERF_COUNTER_STATE_ERROR = -2,
PERF_COUNTER_STATE_OFF = -1,
PERF_COUNTER_STATE_INACTIVE = 0,
PERF_COUNTER_STATE_ACTIVE = 1,
};
struct file;
struct perf_mmap_data {
struct rcu_head rcu_head;
int nr_pages; /* nr of data pages */
int nr_locked; /* nr pages mlocked */
atomic_t poll; /* POLL_ for wakeups */
atomic_t events; /* event limit */
atomic_long_t head; /* write position */
atomic_long_t done_head; /* completed head */
atomic_t lock; /* concurrent writes */
atomic_t wakeup; /* needs a wakeup */
struct perf_counter_mmap_page *user_page;
void *data_pages[0];
};
struct perf_pending_entry {
struct perf_pending_entry *next;
void (*func)(struct perf_pending_entry *);
};
/**
* struct perf_counter - performance counter kernel representation:
*/
struct perf_counter {
#ifdef CONFIG_PERF_COUNTERS
struct list_head list_entry;
struct list_head event_entry;
struct list_head sibling_list;
int nr_siblings;
struct perf_counter *group_leader;
const struct pmu *pmu;
enum perf_counter_active_state state;
atomic64_t count;
/*
* These are the total time in nanoseconds that the counter
* has been enabled (i.e. eligible to run, and the task has
* been scheduled in, if this is a per-task counter)
* and running (scheduled onto the CPU), respectively.
*
* They are computed from tstamp_enabled, tstamp_running and
* tstamp_stopped when the counter is in INACTIVE or ACTIVE state.
*/
u64 total_time_enabled;
u64 total_time_running;
/*
* These are timestamps used for computing total_time_enabled
* and total_time_running when the counter is in INACTIVE or
* ACTIVE state, measured in nanoseconds from an arbitrary point
* in time.
* tstamp_enabled: the notional time when the counter was enabled
* tstamp_running: the notional time when the counter was scheduled on
* tstamp_stopped: in INACTIVE state, the notional time when the
* counter was scheduled off.
*/
u64 tstamp_enabled;
u64 tstamp_running;
u64 tstamp_stopped;
struct perf_counter_attr attr;
struct hw_perf_counter hw;
struct perf_counter_context *ctx;
struct file *filp;
/*
* These accumulate total time (in nanoseconds) that children
* counters have been enabled and running, respectively.
*/
atomic64_t child_total_time_enabled;
atomic64_t child_total_time_running;
/*
* Protect attach/detach and child_list:
*/
struct mutex child_mutex;
struct list_head child_list;
struct perf_counter *parent;
int oncpu;
int cpu;
struct list_head owner_entry;
struct task_struct *owner;
/* mmap bits */
struct mutex mmap_mutex;
atomic_t mmap_count;
struct perf_mmap_data *data;
/* poll related */
wait_queue_head_t waitq;
struct fasync_struct *fasync;
/* delayed work for NMIs and such */
int pending_wakeup;
int pending_kill;
int pending_disable;
struct perf_pending_entry pending;
atomic_t event_limit;
void (*destroy)(struct perf_counter *);
struct rcu_head rcu_head;
struct pid_namespace *ns;
u64 id;
#endif
};
/**
* struct perf_counter_context - counter context structure
*
* Used as a container for task counters and CPU counters as well:
*/
struct perf_counter_context {
/*
* Protect the states of the counters in the list,
* nr_active, and the list:
*/
spinlock_t lock;
/*
* Protect the list of counters. Locking either mutex or lock
* is sufficient to ensure the list doesn't change; to change
* the list you need to lock both the mutex and the spinlock.
*/
struct mutex mutex;
struct list_head counter_list;
struct list_head event_list;
int nr_counters;
int nr_active;
int is_active;
atomic_t refcount;
struct task_struct *task;
/*
* Context clock, runs when context enabled.
*/
u64 time;
u64 timestamp;
/*
* These fields let us detect when two contexts have both
* been cloned (inherited) from a common ancestor.
*/
struct perf_counter_context *parent_ctx;
u64 parent_gen;
u64 generation;
int pin_count;
struct rcu_head rcu_head;
};
/**
* struct perf_counter_cpu_context - per cpu counter context structure
*/
struct perf_cpu_context {
struct perf_counter_context ctx;
struct perf_counter_context *task_ctx;
int active_oncpu;
int max_pertask;
int exclusive;
/*
* Recursion avoidance:
*
* task, softirq, irq, nmi context
*/
int recursion[4];
};
#ifdef CONFIG_PERF_COUNTERS
/*
* Set by architecture code:
*/
extern int perf_max_counters;
extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter);
extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
extern void perf_counter_task_sched_out(struct task_struct *task,
struct task_struct *next, int cpu);
extern void perf_counter_task_tick(struct task_struct *task, int cpu);
extern int perf_counter_init_task(struct task_struct *child);
extern void perf_counter_exit_task(struct task_struct *child);
extern void perf_counter_free_task(struct task_struct *task);
extern void perf_counter_do_pending(void);
extern void perf_counter_print_debug(void);
extern void __perf_disable(void);
extern bool __perf_enable(void);
extern void perf_disable(void);
extern void perf_enable(void);
extern int perf_counter_task_disable(void);
extern int perf_counter_task_enable(void);
extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
struct perf_cpu_context *cpuctx,
struct perf_counter_context *ctx, int cpu);
extern void perf_counter_update_userpage(struct perf_counter *counter);
struct perf_sample_data {
struct pt_regs *regs;
u64 addr;
u64 period;
};
extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
struct perf_sample_data *data);
/*
* Return 1 for a software counter, 0 for a hardware counter
*/
static inline int is_software_counter(struct perf_counter *counter)
{
return (counter->attr.type != PERF_TYPE_RAW) &&
(counter->attr.type != PERF_TYPE_HARDWARE);
}
extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
extern void __perf_counter_mmap(struct vm_area_struct *vma);
static inline void perf_counter_mmap(struct vm_area_struct *vma)
{
if (vma->vm_flags & VM_EXEC)
__perf_counter_mmap(vma);
}
extern void perf_counter_comm(struct task_struct *tsk);
extern void perf_counter_fork(struct task_struct *tsk);
extern void perf_counter_task_migration(struct task_struct *task, int cpu);
#define MAX_STACK_DEPTH 255
struct perf_callchain_entry {
u16 nr;
u16 hv;
u16 kernel;
u16 user;
u64 ip[MAX_STACK_DEPTH];
};
extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
extern int sysctl_perf_counter_paranoid;
extern int sysctl_perf_counter_mlock;
extern int sysctl_perf_counter_sample_rate;
extern void perf_counter_init(void);
#ifndef perf_misc_flags
#define perf_misc_flags(regs) (user_mode(regs) ? PERF_EVENT_MISC_USER : \
PERF_EVENT_MISC_KERNEL)
#define perf_instruction_pointer(regs) instruction_pointer(regs)
#endif
#else
static inline void
perf_counter_task_sched_in(struct task_struct *task, int cpu) { }
static inline void
perf_counter_task_sched_out(struct task_struct *task,
struct task_struct *next, int cpu) { }
static inline void
perf_counter_task_tick(struct task_struct *task, int cpu) { }
static inline int perf_counter_init_task(struct task_struct *child) { return 0; }
static inline void perf_counter_exit_task(struct task_struct *child) { }
static inline void perf_counter_free_task(struct task_struct *task) { }
static inline void perf_counter_do_pending(void) { }
static inline void perf_counter_print_debug(void) { }
static inline void perf_disable(void) { }
static inline void perf_enable(void) { }
static inline int perf_counter_task_disable(void) { return -EINVAL; }
static inline int perf_counter_task_enable(void) { return -EINVAL; }
static inline void
perf_swcounter_event(u32 event, u64 nr, int nmi,
struct pt_regs *regs, u64 addr) { }
static inline void perf_counter_mmap(struct vm_area_struct *vma) { }
static inline void perf_counter_comm(struct task_struct *tsk) { }
static inline void perf_counter_fork(struct task_struct *tsk) { }
static inline void perf_counter_init(void) { }
static inline void perf_counter_task_migration(struct task_struct *task,
int cpu) { }
#endif
#endif /* __KERNEL__ */
#endif /* _LINUX_PERF_COUNTER_H */

View file

@ -85,4 +85,7 @@
#define PR_SET_TIMERSLACK 29
#define PR_GET_TIMERSLACK 30
#define PR_TASK_PERF_COUNTERS_DISABLE 31
#define PR_TASK_PERF_COUNTERS_ENABLE 32
#endif /* _LINUX_PRCTL_H */

View file

@ -99,6 +99,7 @@ struct robust_list_head;
struct bio;
struct fs_struct;
struct bts_context;
struct perf_counter_context;
/*
* List of flags we want to share for kernel threads,
@ -139,6 +140,7 @@ extern unsigned long nr_running(void);
extern unsigned long nr_uninterruptible(void);
extern unsigned long nr_iowait(void);
extern void calc_global_load(void);
extern u64 cpu_nr_migrations(int cpu);
extern unsigned long get_parent_ip(unsigned long addr);
@ -674,6 +676,10 @@ struct user_struct {
struct work_struct work;
#endif
#endif
#ifdef CONFIG_PERF_COUNTERS
atomic_long_t locked_vm;
#endif
};
extern int uids_sysfs_init(void);
@ -1073,9 +1079,10 @@ struct sched_entity {
u64 last_wakeup;
u64 avg_overlap;
u64 nr_migrations;
u64 start_runtime;
u64 avg_wakeup;
u64 nr_migrations;
#ifdef CONFIG_SCHEDSTATS
u64 wait_start;
@ -1396,6 +1403,11 @@ struct task_struct {
struct list_head pi_state_list;
struct futex_pi_state *pi_state_cache;
#endif
#ifdef CONFIG_PERF_COUNTERS
struct perf_counter_context *perf_counter_ctxp;
struct mutex perf_counter_mutex;
struct list_head perf_counter_list;
#endif
#ifdef CONFIG_NUMA
struct mempolicy *mempolicy;
short il_next;
@ -2410,6 +2422,13 @@ static inline void inc_syscw(struct task_struct *tsk)
#define TASK_SIZE_OF(tsk) TASK_SIZE
#endif
/*
* Call the function if the target task is executing on a CPU right now:
*/
extern void task_oncpu_function_call(struct task_struct *p,
void (*func) (void *info), void *info);
#ifdef CONFIG_MM_OWNER
extern void mm_update_next_owner(struct mm_struct *mm);
extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);

View file

@ -55,6 +55,7 @@ struct compat_timeval;
struct robust_list_head;
struct getcpu_cache;
struct old_linux_dirent;
struct perf_counter_attr;
#include <linux/types.h>
#include <linux/aio_abi.h>
@ -755,4 +756,8 @@ asmlinkage long sys_pipe(int __user *);
int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
asmlinkage long sys_perf_counter_open(
const struct perf_counter_attr __user *attr_uptr,
pid_t pid, int cpu, int group_fd, unsigned long flags);
#endif

View file

@ -933,6 +933,40 @@ config AIO
by some high performance threaded applications. Disabling
this option saves about 7k.
config HAVE_PERF_COUNTERS
bool
menu "Performance Counters"
config PERF_COUNTERS
bool "Kernel Performance Counters"
depends on HAVE_PERF_COUNTERS
select ANON_INODES
help
Enable kernel support for performance counter hardware.
Performance counters are special hardware registers available
on most modern CPUs. These registers count the number of certain
types of hw events: such as instructions executed, cachemisses
suffered, or branches mis-predicted - without slowing down the
kernel or applications. These registers can also trigger interrupts
when a threshold number of events have passed - and can thus be
used to profile the code that runs on that CPU.
The Linux Performance Counter subsystem provides an abstraction of
these hardware capabilities, available via a system call. It
provides per task and per CPU counters, and it provides event
capabilities on top of those.
Say Y if unsure.
config EVENT_PROFILE
bool "Tracepoint profile sources"
depends on PERF_COUNTERS && EVENT_TRACER
default y
endmenu
config VM_EVENT_COUNTERS
default y
bool "Enable VM event counters for /proc/vmstat" if EMBEDDED

View file

@ -96,6 +96,7 @@ obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_X86_DS) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_SLOW_WORK) += slow-work.o
obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is

View file

@ -48,6 +48,7 @@
#include <linux/tracehook.h>
#include <linux/fs_struct.h>
#include <linux/init_task.h>
#include <linux/perf_counter.h>
#include <trace/events/sched.h>
#include <asm/uaccess.h>
@ -154,6 +155,9 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
{
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
#ifdef CONFIG_PERF_COUNTERS
WARN_ON_ONCE(tsk->perf_counter_ctxp);
#endif
trace_sched_process_free(tsk);
put_task_struct(tsk);
}
@ -170,6 +174,7 @@ repeat:
atomic_dec(&__task_cred(p)->user->processes);
proc_flush_task(p);
write_lock_irq(&tasklist_lock);
tracehook_finish_release_task(p);
__exit_signal(p);
@ -971,16 +976,19 @@ NORET_TYPE void do_exit(long code)
module_put(tsk->binfmt->module);
proc_exit_connector(tsk);
/*
* Flush inherited counters to the parent - before the parent
* gets woken up by child-exit notifications.
*/
perf_counter_exit_task(tsk);
exit_notify(tsk, group_dead);
#ifdef CONFIG_NUMA
mpol_put(tsk->mempolicy);
tsk->mempolicy = NULL;
#endif
#ifdef CONFIG_FUTEX
/*
* This must happen late, after the PID is not
* hashed anymore:
*/
if (unlikely(!list_empty(&tsk->pi_state_list)))
exit_pi_state_list(tsk);
if (unlikely(current->pi_state_cache))

View file

@ -62,6 +62,7 @@
#include <linux/blkdev.h>
#include <linux/fs_struct.h>
#include <linux/magic.h>
#include <linux/perf_counter.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@ -1096,6 +1097,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
/* Perform scheduler related setup. Assign this task to a CPU. */
sched_fork(p, clone_flags);
retval = perf_counter_init_task(p);
if (retval)
goto bad_fork_cleanup_policy;
if ((retval = audit_alloc(p)))
goto bad_fork_cleanup_policy;
/* copy all the process information */
@ -1290,6 +1295,7 @@ bad_fork_cleanup_semundo:
bad_fork_cleanup_audit:
audit_free(p);
bad_fork_cleanup_policy:
perf_counter_free_task(p);
#ifdef CONFIG_NUMA
mpol_put(p->mempolicy);
bad_fork_cleanup_cgroup:
@ -1403,6 +1409,12 @@ long do_fork(unsigned long clone_flags,
if (clone_flags & CLONE_VFORK) {
p->vfork_done = &vfork;
init_completion(&vfork);
} else if (!(clone_flags & CLONE_VM)) {
/*
* vfork will do an exec which will call
* set_task_comm()
*/
perf_counter_fork(p);
}
audit_finish_fork(p);

View file

@ -89,7 +89,7 @@ __mutex_lock_slowpath(atomic_t *lock_count);
*
* This function is similar to (but not equivalent to) down().
*/
void inline __sched mutex_lock(struct mutex *lock)
void __sched mutex_lock(struct mutex *lock)
{
might_sleep();
/*

4260
kernel/perf_counter.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -39,6 +39,7 @@
#include <linux/completion.h>
#include <linux/kernel_stat.h>
#include <linux/debug_locks.h>
#include <linux/perf_counter.h>
#include <linux/security.h>
#include <linux/notifier.h>
#include <linux/profile.h>
@ -579,6 +580,7 @@ struct rq {
struct load_weight load;
unsigned long nr_load_updates;
u64 nr_switches;
u64 nr_migrations_in;
struct cfs_rq cfs;
struct rt_rq rt;
@ -691,7 +693,7 @@ static inline int cpu_of(struct rq *rq)
#define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
static inline void update_rq_clock(struct rq *rq)
inline void update_rq_clock(struct rq *rq)
{
rq->clock = sched_clock_cpu(cpu_of(rq));
}
@ -1968,12 +1970,16 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
p->se.sleep_start -= clock_offset;
if (p->se.block_start)
p->se.block_start -= clock_offset;
#endif
if (old_cpu != new_cpu) {
schedstat_inc(p, se.nr_migrations);
p->se.nr_migrations++;
new_rq->nr_migrations_in++;
#ifdef CONFIG_SCHEDSTATS
if (task_hot(p, old_rq->clock, NULL))
schedstat_inc(p, se.nr_forced2_migrations);
}
#endif
perf_counter_task_migration(p, new_cpu);
}
p->se.vruntime -= old_cfsrq->min_vruntime -
new_cfsrq->min_vruntime;
@ -2368,6 +2374,27 @@ static int sched_balance_self(int cpu, int flag)
#endif /* CONFIG_SMP */
/**
* task_oncpu_function_call - call a function on the cpu on which a task runs
* @p: the task to evaluate
* @func: the function to be called
* @info: the function call argument
*
* Calls the function @func when the task is currently running. This might
* be on the current CPU, which just calls the function directly
*/
void task_oncpu_function_call(struct task_struct *p,
void (*func) (void *info), void *info)
{
int cpu;
preempt_disable();
cpu = task_cpu(p);
if (task_curr(p))
smp_call_function_single(cpu, func, info, 1);
preempt_enable();
}
/***
* try_to_wake_up - wake up a thread
* @p: the to-be-woken-up thread
@ -2535,6 +2562,7 @@ static void __sched_fork(struct task_struct *p)
p->se.exec_start = 0;
p->se.sum_exec_runtime = 0;
p->se.prev_sum_exec_runtime = 0;
p->se.nr_migrations = 0;
p->se.last_wakeup = 0;
p->se.avg_overlap = 0;
p->se.start_runtime = 0;
@ -2765,6 +2793,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
*/
prev_state = prev->state;
finish_arch_switch(prev);
perf_counter_task_sched_in(current, cpu_of(rq));
finish_lock_switch(rq, prev);
#ifdef CONFIG_SMP
if (post_schedule)
@ -2979,6 +3008,15 @@ static void calc_load_account_active(struct rq *this_rq)
}
}
/*
* Externally visible per-cpu scheduler statistics:
* cpu_nr_migrations(cpu) - number of migrations into that cpu
*/
u64 cpu_nr_migrations(int cpu)
{
return cpu_rq(cpu)->nr_migrations_in;
}
/*
* Update rq->cpu_load[] statistics. This function is usually called every
* scheduler tick (TICK_NSEC).
@ -5077,6 +5115,8 @@ void scheduler_tick(void)
curr->sched_class->task_tick(rq, curr, 0);
spin_unlock(&rq->lock);
perf_counter_task_tick(curr, cpu);
#ifdef CONFIG_SMP
rq->idle_at_tick = idle_cpu(cpu);
trigger_load_balance(rq, cpu);
@ -5292,6 +5332,7 @@ need_resched_nonpreemptible:
if (likely(prev != next)) {
sched_info_switch(prev, next);
perf_counter_task_sched_out(prev, next, cpu);
rq->nr_switches++;
rq->curr = next;
@ -7535,8 +7576,10 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
return NOTIFY_OK;
}
/* Register at highest priority so that task migration (migrate_all_tasks)
* happens before everything else.
/*
* Register at high priority so that task migration (migrate_all_tasks)
* happens before everything else. This has to be lower priority than
* the notifier in the perf_counter subsystem, though.
*/
static struct notifier_block __cpuinitdata migration_notifier = {
.notifier_call = migration_call,
@ -9214,7 +9257,7 @@ void __init sched_init(void)
* 1024) and two child groups A0 and A1 (of weight 1024 each),
* then A0's share of the cpu resource is:
*
* A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
* A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
*
* We achieve this by letting init_task_group's tasks sit
* directly in rq->cfs (i.e init_task_group->se[] = NULL).
@ -9319,6 +9362,8 @@ void __init sched_init(void)
alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
#endif /* SMP */
perf_counter_init();
scheduler_running = 1;
}

View file

@ -14,6 +14,7 @@
#include <linux/prctl.h>
#include <linux/highuid.h>
#include <linux/fs.h>
#include <linux/perf_counter.h>
#include <linux/resource.h>
#include <linux/kernel.h>
#include <linux/kexec.h>
@ -1793,6 +1794,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_SET_TSC:
error = SET_TSC_CTL(arg2);
break;
case PR_TASK_PERF_COUNTERS_DISABLE:
error = perf_counter_task_disable();
break;
case PR_TASK_PERF_COUNTERS_ENABLE:
error = perf_counter_task_enable();
break;
case PR_GET_TIMERSLACK:
error = current->timer_slack_ns;
break;

View file

@ -175,3 +175,6 @@ cond_syscall(compat_sys_timerfd_settime);
cond_syscall(compat_sys_timerfd_gettime);
cond_syscall(sys_eventfd);
cond_syscall(sys_eventfd2);
/* performance counters: */
cond_syscall(sys_perf_counter_open);

View file

@ -49,6 +49,7 @@
#include <linux/reboot.h>
#include <linux/ftrace.h>
#include <linux/slow-work.h>
#include <linux/perf_counter.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
@ -932,6 +933,32 @@ static struct ctl_table kern_table[] = {
.child = slow_work_sysctls,
},
#endif
#ifdef CONFIG_PERF_COUNTERS
{
.ctl_name = CTL_UNNUMBERED,
.procname = "perf_counter_paranoid",
.data = &sysctl_perf_counter_paranoid,
.maxlen = sizeof(sysctl_perf_counter_paranoid),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "perf_counter_mlock_kb",
.data = &sysctl_perf_counter_mlock,
.maxlen = sizeof(sysctl_perf_counter_mlock),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "perf_counter_max_sample_rate",
.data = &sysctl_perf_counter_sample_rate,
.maxlen = sizeof(sysctl_perf_counter_sample_rate),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
#endif
/*
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt

View file

@ -37,6 +37,7 @@
#include <linux/delay.h>
#include <linux/tick.h>
#include <linux/kallsyms.h>
#include <linux/perf_counter.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@ -1129,6 +1130,8 @@ static void run_timer_softirq(struct softirq_action *h)
{
struct tvec_base *base = __get_cpu_var(tvec_bases);
perf_counter_do_pending();
hrtimer_run_pending();
if (time_after_eq(jiffies, base->timer_jiffies))

View file

@ -28,6 +28,7 @@
#include <linux/mempolicy.h>
#include <linux/rmap.h>
#include <linux/mmu_notifier.h>
#include <linux/perf_counter.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
@ -1222,6 +1223,8 @@ munmap_back:
if (correct_wcount)
atomic_inc(&inode->i_writecount);
out:
perf_counter_mmap(vma);
mm->total_vm += len >> PAGE_SHIFT;
vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
if (vm_flags & VM_LOCKED) {
@ -2308,6 +2311,8 @@ int install_special_mapping(struct mm_struct *mm,
mm->total_vm += len >> PAGE_SHIFT;
perf_counter_mmap(vma);
return 0;
}

View file

@ -23,6 +23,7 @@
#include <linux/swapops.h>
#include <linux/mmu_notifier.h>
#include <linux/migrate.h>
#include <linux/perf_counter.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/cacheflush.h>
@ -299,6 +300,7 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
if (error)
goto out;
perf_counter_mmap(vma);
nstart = tmp;
if (nstart < prev->vm_end)

16
tools/perf/.gitignore vendored Normal file
View file

@ -0,0 +1,16 @@
PERF-BUILD-OPTIONS
PERF-CFLAGS
PERF-GUI-VARS
PERF-VERSION-FILE
perf
perf-help
perf-record
perf-report
perf-stat
perf-top
perf*.1
perf*.xml
common-cmds.h
tags
TAGS
cscope*

View file

@ -0,0 +1,300 @@
MAN1_TXT= \
$(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \
$(wildcard perf-*.txt)) \
perf.txt
MAN5_TXT=
MAN7_TXT=
MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT)
MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT))
MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT))
DOC_HTML=$(MAN_HTML)
ARTICLES =
# with their own formatting rules.
SP_ARTICLES =
API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technical/api-index.txt, $(wildcard technical/api-*.txt)))
SP_ARTICLES += $(API_DOCS)
SP_ARTICLES += technical/api-index
DOC_HTML += $(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES))
DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT))
DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT))
DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT))
prefix?=$(HOME)
bindir?=$(prefix)/bin
htmldir?=$(prefix)/share/doc/perf-doc
pdfdir?=$(prefix)/share/doc/perf-doc
mandir?=$(prefix)/share/man
man1dir=$(mandir)/man1
man5dir=$(mandir)/man5
man7dir=$(mandir)/man7
# DESTDIR=
ASCIIDOC=asciidoc
ASCIIDOC_EXTRA =
MANPAGE_XSL = manpage-normal.xsl
XMLTO_EXTRA =
INSTALL?=install
RM ?= rm -f
DOC_REF = origin/man
HTML_REF = origin/html
infodir?=$(prefix)/share/info
MAKEINFO=makeinfo
INSTALL_INFO=install-info
DOCBOOK2X_TEXI=docbook2x-texi
DBLATEX=dblatex
ifndef PERL_PATH
PERL_PATH = /usr/bin/perl
endif
-include ../config.mak.autogen
-include ../config.mak
#
# For asciidoc ...
# -7.1.2, no extra settings are needed.
# 8.0-, set ASCIIDOC8.
#
#
# For docbook-xsl ...
# -1.68.1, set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0)
# 1.69.0, no extra settings are needed?
# 1.69.1-1.71.0, set DOCBOOK_SUPPRESS_SP?
# 1.71.1, no extra settings are needed?
# 1.72.0, set DOCBOOK_XSL_172.
# 1.73.0-, set ASCIIDOC_NO_ROFF
#
#
# If you had been using DOCBOOK_XSL_172 in an attempt to get rid
# of 'the ".ft C" problem' in your generated manpages, and you
# instead ended up with weird characters around callouts, try
# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8).
#
ifdef ASCIIDOC8
ASCIIDOC_EXTRA += -a asciidoc7compatible
endif
ifdef DOCBOOK_XSL_172
ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff
MANPAGE_XSL = manpage-1.72.xsl
else
ifdef ASCIIDOC_NO_ROFF
# docbook-xsl after 1.72 needs the regular XSL, but will not
# pass-thru raw roff codes from asciidoc.conf, so turn them off.
ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff
endif
endif
ifdef MAN_BOLD_LITERAL
XMLTO_EXTRA += -m manpage-bold-literal.xsl
endif
ifdef DOCBOOK_SUPPRESS_SP
XMLTO_EXTRA += -m manpage-suppress-sp.xsl
endif
SHELL_PATH ?= $(SHELL)
# Shell quote;
SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
#
# Please note that there is a minor bug in asciidoc.
# The version after 6.0.3 _will_ include the patch found here:
# http://marc.theaimsgroup.com/?l=perf&m=111558757202243&w=2
#
# Until that version is released you may have to apply the patch
# yourself - yes, all 6 characters of it!
#
QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir
QUIET_SUBDIR1 =
ifneq ($(findstring $(MAKEFLAGS),w),w)
PRINT_DIR = --no-print-directory
else # "make -w"
NO_SUBDIR = :
endif
ifneq ($(findstring $(MAKEFLAGS),s),s)
ifndef V
QUIET_ASCIIDOC = @echo ' ' ASCIIDOC $@;
QUIET_XMLTO = @echo ' ' XMLTO $@;
QUIET_DB2TEXI = @echo ' ' DB2TEXI $@;
QUIET_MAKEINFO = @echo ' ' MAKEINFO $@;
QUIET_DBLATEX = @echo ' ' DBLATEX $@;
QUIET_XSLTPROC = @echo ' ' XSLTPROC $@;
QUIET_GEN = @echo ' ' GEN $@;
QUIET_STDERR = 2> /dev/null
QUIET_SUBDIR0 = +@subdir=
QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \
$(MAKE) $(PRINT_DIR) -C $$subdir
export V
endif
endif
all: html man
html: $(DOC_HTML)
$(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7): asciidoc.conf
man: man1 man5 man7
man1: $(DOC_MAN1)
man5: $(DOC_MAN5)
man7: $(DOC_MAN7)
info: perf.info perfman.info
pdf: user-manual.pdf
install: install-man
install-man: man
$(INSTALL) -d -m 755 $(DESTDIR)$(man1dir)
# $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir)
# $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
$(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir)
# $(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir)
# $(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
install-info: info
$(INSTALL) -d -m 755 $(DESTDIR)$(infodir)
$(INSTALL) -m 644 perf.info perfman.info $(DESTDIR)$(infodir)
if test -r $(DESTDIR)$(infodir)/dir; then \
$(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\
$(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\
else \
echo "No directory found in $(DESTDIR)$(infodir)" >&2 ; \
fi
install-pdf: pdf
$(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir)
$(INSTALL) -m 644 user-manual.pdf $(DESTDIR)$(pdfdir)
install-html: html
'$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
../PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
$(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) PERF-VERSION-FILE
-include ../PERF-VERSION-FILE
#
# Determine "include::" file references in asciidoc files.
#
doc.dep : $(wildcard *.txt) build-docdep.perl
$(QUIET_GEN)$(RM) $@+ $@ && \
$(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \
mv $@+ $@
-include doc.dep
cmds_txt = cmds-ancillaryinterrogators.txt \
cmds-ancillarymanipulators.txt \
cmds-mainporcelain.txt \
cmds-plumbinginterrogators.txt \
cmds-plumbingmanipulators.txt \
cmds-synchingrepositories.txt \
cmds-synchelpers.txt \
cmds-purehelpers.txt \
cmds-foreignscminterface.txt
$(cmds_txt): cmd-list.made
cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT)
$(QUIET_GEN)$(RM) $@ && \
$(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \
date >$@
clean:
$(RM) *.xml *.xml+ *.html *.html+ *.1 *.5 *.7
$(RM) *.texi *.texi+ *.texi++ perf.info perfman.info
$(RM) howto-index.txt howto/*.html doc.dep
$(RM) technical/api-*.html technical/api-index.txt
$(RM) $(cmds_txt) *.made
$(MAN_HTML): %.html : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
$(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
mv $@+ $@
%.1 %.5 %.7 : %.xml
$(QUIET_XMLTO)$(RM) $@ && \
xmlto -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
%.xml : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
$(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
mv $@+ $@
XSLT = docbook.xsl
XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css
user-manual.html: user-manual.xml
$(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $<
perf.info: user-manual.texi
$(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ user-manual.texi
user-manual.texi: user-manual.xml
$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
$(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \
$(PERL_PATH) fix-texi.perl <$@++ >$@+ && \
rm $@++ && \
mv $@+ $@
user-manual.pdf: user-manual.xml
$(QUIET_DBLATEX)$(RM) $@+ $@ && \
$(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \
mv $@+ $@
perfman.texi: $(MAN_XML) cat-texi.perl
$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \
--to-stdout $(xml) &&) true) > $@++ && \
$(PERL_PATH) cat-texi.perl $@ <$@++ >$@+ && \
rm $@++ && \
mv $@+ $@
perfman.info: perfman.texi
$(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi
$(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml
$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
$(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@+ && \
mv $@+ $@
howto-index.txt: howto-index.sh $(wildcard howto/*.txt)
$(QUIET_GEN)$(RM) $@+ $@ && \
'$(SHELL_PATH_SQ)' ./howto-index.sh $(wildcard howto/*.txt) >$@+ && \
mv $@+ $@
$(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt
$(QUIET_ASCIIDOC)$(ASCIIDOC) -b xhtml11 $*.txt
WEBDOC_DEST = /pub/software/tools/perf/docs
$(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \
mv $@+ $@
install-webdoc : html
'$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST)
quick-install: quick-install-man
quick-install-man:
'$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(DOC_REF) $(DESTDIR)$(mandir)
quick-install-html:
'$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir)
.PHONY: .FORCE-PERF-VERSION-FILE

View file

@ -0,0 +1,91 @@
## linkperf: macro
#
# Usage: linkperf:command[manpage-section]
#
# Note, {0} is the manpage section, while {target} is the command.
#
# Show PERF link as: <command>(<section>); if section is defined, else just show
# the command.
[macros]
(?su)[\\]?(?P<name>linkperf):(?P<target>\S*?)\[(?P<attrlist>.*?)\]=
[attributes]
asterisk=&#42;
plus=&#43;
caret=&#94;
startsb=&#91;
endsb=&#93;
tilde=&#126;
ifdef::backend-docbook[]
[linkperf-inlinemacro]
{0%{target}}
{0#<citerefentry>}
{0#<refentrytitle>{target}</refentrytitle><manvolnum>{0}</manvolnum>}
{0#</citerefentry>}
endif::backend-docbook[]
ifdef::backend-docbook[]
ifndef::perf-asciidoc-no-roff[]
# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this.
# v1.72 breaks with this because it replaces dots not in roff requests.
[listingblock]
<example><title>{title}</title>
<literallayout>
ifdef::doctype-manpage[]
&#10;.ft C&#10;
endif::doctype-manpage[]
|
ifdef::doctype-manpage[]
&#10;.ft&#10;
endif::doctype-manpage[]
</literallayout>
{title#}</example>
endif::perf-asciidoc-no-roff[]
ifdef::perf-asciidoc-no-roff[]
ifdef::doctype-manpage[]
# The following two small workarounds insert a simple paragraph after screen
[listingblock]
<example><title>{title}</title>
<literallayout>
|
</literallayout><simpara></simpara>
{title#}</example>
[verseblock]
<formalpara{id? id="{id}"}><title>{title}</title><para>
{title%}<literallayout{id? id="{id}"}>
{title#}<literallayout>
|
</literallayout>
{title#}</para></formalpara>
{title%}<simpara></simpara>
endif::doctype-manpage[]
endif::perf-asciidoc-no-roff[]
endif::backend-docbook[]
ifdef::doctype-manpage[]
ifdef::backend-docbook[]
[header]
template::[header-declarations]
<refentry>
<refmeta>
<refentrytitle>{mantitle}</refentrytitle>
<manvolnum>{manvolnum}</manvolnum>
<refmiscinfo class="source">perf</refmiscinfo>
<refmiscinfo class="version">{perf_version}</refmiscinfo>
<refmiscinfo class="manual">perf Manual</refmiscinfo>
</refmeta>
<refnamediv>
<refname>{manname}</refname>
<refpurpose>{manpurpose}</refpurpose>
</refnamediv>
endif::backend-docbook[]
endif::doctype-manpage[]
ifdef::backend-xhtml11[]
[linkperf-inlinemacro]
<a href="{target}.html">{target}{0?({0})}</a>
endif::backend-xhtml11[]

View file

@ -0,0 +1,14 @@
<!-- manpage-1.72.xsl:
special settings for manpages rendered from asciidoc+docbook
handles peculiarities in docbook-xsl 1.72.0 -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<xsl:import href="manpage-base.xsl"/>
<!-- these are the special values for the roff control characters
needed for docbook-xsl 1.72.0 -->
<xsl:param name="git.docbook.backslash">&#x2593;</xsl:param>
<xsl:param name="git.docbook.dot" >&#x2302;</xsl:param>
</xsl:stylesheet>

View file

@ -0,0 +1,35 @@
<!-- manpage-base.xsl:
special formatting for manpages rendered from asciidoc+docbook -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<!-- these params silence some output from xmlto -->
<xsl:param name="man.output.quietly" select="1"/>
<xsl:param name="refentry.meta.get.quietly" select="1"/>
<!-- convert asciidoc callouts to man page format;
git.docbook.backslash and git.docbook.dot params
must be supplied by another XSL file or other means -->
<xsl:template match="co">
<xsl:value-of select="concat(
$git.docbook.backslash,'fB(',
substring-after(@id,'-'),')',
$git.docbook.backslash,'fR')"/>
</xsl:template>
<xsl:template match="calloutlist">
<xsl:value-of select="$git.docbook.dot"/>
<xsl:text>sp&#10;</xsl:text>
<xsl:apply-templates/>
<xsl:text>&#10;</xsl:text>
</xsl:template>
<xsl:template match="callout">
<xsl:value-of select="concat(
$git.docbook.backslash,'fB',
substring-after(@arearefs,'-'),
'. ',$git.docbook.backslash,'fR')"/>
<xsl:apply-templates/>
<xsl:value-of select="$git.docbook.dot"/>
<xsl:text>br&#10;</xsl:text>
</xsl:template>
</xsl:stylesheet>

View file

@ -0,0 +1,17 @@
<!-- manpage-bold-literal.xsl:
special formatting for manpages rendered from asciidoc+docbook -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<!-- render literal text as bold (instead of plain or monospace);
this makes literal text easier to distinguish in manpages
viewed on a tty -->
<xsl:template match="literal">
<xsl:value-of select="$git.docbook.backslash"/>
<xsl:text>fB</xsl:text>
<xsl:apply-templates/>
<xsl:value-of select="$git.docbook.backslash"/>
<xsl:text>fR</xsl:text>
</xsl:template>
</xsl:stylesheet>

View file

@ -0,0 +1,13 @@
<!-- manpage-normal.xsl:
special settings for manpages rendered from asciidoc+docbook
handles anything we want to keep away from docbook-xsl 1.72.0 -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<xsl:import href="manpage-base.xsl"/>
<!-- these are the normal values for the roff control characters -->
<xsl:param name="git.docbook.backslash">\</xsl:param>
<xsl:param name="git.docbook.dot" >.</xsl:param>
</xsl:stylesheet>

View file

@ -0,0 +1,21 @@
<!-- manpage-suppress-sp.xsl:
special settings for manpages rendered from asciidoc+docbook
handles erroneous, inline .sp in manpage output of some
versions of docbook-xsl -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<!-- attempt to work around spurious .sp at the tail of the line
that some versions of docbook stylesheets seem to add -->
<xsl:template match="simpara">
<xsl:variable name="content">
<xsl:apply-templates/>
</xsl:variable>
<xsl:value-of select="normalize-space($content)"/>
<xsl:if test="not(ancestor::authorblurb) and
not(ancestor::personblurb)">
<xsl:text>&#10;&#10;</xsl:text>
</xsl:if>
</xsl:template>
</xsl:stylesheet>

View file

@ -0,0 +1,29 @@
perf-annotate(1)
==============
NAME
----
perf-annotate - Read perf.data (created by perf record) and display annotated code
SYNOPSIS
--------
[verse]
'perf annotate' [-i <file> | --input=file] symbol_name
DESCRIPTION
-----------
This command reads the input file and displays an annotated version of the
code. If the object file has debug symbols then the source code will be
displayed alongside assembly code.
If there is no debug info in the object, then annotated assembly is displayed.
OPTIONS
-------
-i::
--input=::
Input file name. (default: perf.data)
SEE ALSO
--------
linkperf:perf-record[1]

View file

@ -0,0 +1,38 @@
perf-help(1)
============
NAME
----
perf-help - display help information about perf
SYNOPSIS
--------
'perf help' [-a|--all] [COMMAND]
DESCRIPTION
-----------
With no options and no COMMAND given, the synopsis of the 'perf'
command and a list of the most commonly used perf commands are printed
on the standard output.
If the option '--all' or '-a' is given, then all available commands are
printed on the standard output.
If a perf command is named, a manual page for that command is brought
up. The 'man' program is used by default for this purpose, but this
can be overridden by other options or configuration variables.
Note that `perf --help ...` is identical to `perf help ...` because the
former is internally converted into the latter.
OPTIONS
-------
-a::
--all::
Prints all the available commands on the standard output. This
option supersedes any other option.
PERF
----
Part of the linkperf:perf[1] suite

View file

@ -0,0 +1,25 @@
perf-list(1)
============
NAME
----
perf-list - List all symbolic event types
SYNOPSIS
--------
[verse]
'perf list'
DESCRIPTION
-----------
This command displays the symbolic event types which can be selected in the
various perf commands with the -e option.
OPTIONS
-------
None
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-top[1],
linkperf:perf-record[1]

View file

@ -0,0 +1,42 @@
perf-record(1)
==============
NAME
----
perf-record - Run a command and record its profile into perf.data
SYNOPSIS
--------
[verse]
'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>]
DESCRIPTION
-----------
This command runs a command and gathers a performance counter profile
from it, into perf.data - without displaying anything.
This file can then be inspected later on, using 'perf report'.
OPTIONS
-------
<command>...::
Any command you can specify in a shell.
-e::
--event=::
Select the PMU event. Selection can be a symbolic event name
(use 'perf list' to list all events) or a raw PMU
event (eventsel+umask) in the form of rNNN where NNN is a
hexadecimal event descriptor.
-a::
system-wide collection
-l::
scale counter values
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]

View file

@ -0,0 +1,26 @@
perf-report(1)
==============
NAME
----
perf-report - Read perf.data (created by perf record) and display the profile
SYNOPSIS
--------
[verse]
'perf report' [-i <file> | --input=file]
DESCRIPTION
-----------
This command displays the performance counter profile information recorded
via perf report.
OPTIONS
-------
-i::
--input=::
Input file name. (default: perf.data)
SEE ALSO
--------
linkperf:perf-stat[1]

View file

@ -0,0 +1,66 @@
perf-stat(1)
============
NAME
----
perf-stat - Run a command and gather performance counter statistics
SYNOPSIS
--------
[verse]
'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>]
DESCRIPTION
-----------
This command runs a command and gathers performance counter statistics
from it.
OPTIONS
-------
<command>...::
Any command you can specify in a shell.
-e::
--event=::
Select the PMU event. Selection can be a symbolic event name
(use 'perf list' to list all events) or a raw PMU
event (eventsel+umask) in the form of rNNN where NNN is a
hexadecimal event descriptor.
-i::
--inherit::
child tasks inherit counters
-p::
--pid=<pid>::
stat events on existing pid
-a::
system-wide collection
-l::
scale counter values
EXAMPLES
--------
$ perf stat -- make -j
Performance counter stats for 'make -j':
8117.370256 task clock ticks # 11.281 CPU utilization factor
678 context switches # 0.000 M/sec
133 CPU migrations # 0.000 M/sec
235724 pagefaults # 0.029 M/sec
24821162526 CPU cycles # 3057.784 M/sec
18687303457 instructions # 2302.138 M/sec
172158895 cache references # 21.209 M/sec
27075259 cache misses # 3.335 M/sec
Wall-clock time elapsed: 719.554352 msecs
SEE ALSO
--------
linkperf:perf-top[1], linkperf:perf-list[1]

View file

@ -0,0 +1,39 @@
perf-top(1)
===========
NAME
----
perf-top - Run a command and profile it
SYNOPSIS
--------
[verse]
'perf top' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
DESCRIPTION
-----------
This command runs a command and gathers a performance counter profile
from it.
OPTIONS
-------
<command>...::
Any command you can specify in a shell.
-e::
--event=::
Select the PMU event. Selection can be a symbolic event name
(use 'perf list' to list all events) or a raw PMU
event (eventsel+umask) in the form of rNNN where NNN is a
hexadecimal event descriptor.
-a::
system-wide collection
-l::
scale counter values
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]

View file

@ -0,0 +1,24 @@
perf(1)
=======
NAME
----
perf - Performance analysis tools for Linux
SYNOPSIS
--------
[verse]
'perf' [--version] [--help] COMMAND [ARGS]
DESCRIPTION
-----------
Performance counters for Linux are are a new kernel-based subsystem
that provide a framework for all things performance analysis. It
covers hardware level (CPU/PMU, Performance Monitoring Unit) features
and software features (software counters, tracepoints) as well.
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-top[1],
linkperf:perf-record[1], linkperf:perf-report[1],
linkperf:perf-list[1]

929
tools/perf/Makefile Normal file
View file

@ -0,0 +1,929 @@
# The default target of this Makefile is...
all::
# Define V=1 to have a more verbose compile.
#
# Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf()
# or vsnprintf() return -1 instead of number of characters which would
# have been written to the final string if enough space had been available.
#
# Define FREAD_READS_DIRECTORIES if your are on a system which succeeds
# when attempting to read from an fopen'ed directory.
#
# Define NO_OPENSSL environment variable if you do not have OpenSSL.
# This also implies MOZILLA_SHA1.
#
# Define CURLDIR=/foo/bar if your curl header and library files are in
# /foo/bar/include and /foo/bar/lib directories.
#
# Define EXPATDIR=/foo/bar if your expat header and library files are in
# /foo/bar/include and /foo/bar/lib directories.
#
# Define NO_D_INO_IN_DIRENT if you don't have d_ino in your struct dirent.
#
# Define NO_D_TYPE_IN_DIRENT if your platform defines DT_UNKNOWN but lacks
# d_type in struct dirent (latest Cygwin -- will be fixed soonish).
#
# Define NO_C99_FORMAT if your formatted IO functions (printf/scanf et.al.)
# do not support the 'size specifiers' introduced by C99, namely ll, hh,
# j, z, t. (representing long long int, char, intmax_t, size_t, ptrdiff_t).
# some C compilers supported these specifiers prior to C99 as an extension.
#
# Define NO_STRCASESTR if you don't have strcasestr.
#
# Define NO_MEMMEM if you don't have memmem.
#
# Define NO_STRTOUMAX if you don't have strtoumax in the C library.
# If your compiler also does not support long long or does not have
# strtoull, define NO_STRTOULL.
#
# Define NO_SETENV if you don't have setenv in the C library.
#
# Define NO_UNSETENV if you don't have unsetenv in the C library.
#
# Define NO_MKDTEMP if you don't have mkdtemp in the C library.
#
# Define NO_SYS_SELECT_H if you don't have sys/select.h.
#
# Define NO_SYMLINK_HEAD if you never want .perf/HEAD to be a symbolic link.
# Enable it on Windows. By default, symrefs are still used.
#
# Define NO_SVN_TESTS if you want to skip time-consuming SVN interoperability
# tests. These tests take up a significant amount of the total test time
# but are not needed unless you plan to talk to SVN repos.
#
# Define NO_FINK if you are building on Darwin/Mac OS X, have Fink
# installed in /sw, but don't want PERF to link against any libraries
# installed there. If defined you may specify your own (or Fink's)
# include directories and library directories by defining CFLAGS
# and LDFLAGS appropriately.
#
# Define NO_DARWIN_PORTS if you are building on Darwin/Mac OS X,
# have DarwinPorts installed in /opt/local, but don't want PERF to
# link against any libraries installed there. If defined you may
# specify your own (or DarwinPort's) include directories and
# library directories by defining CFLAGS and LDFLAGS appropriately.
#
# Define PPC_SHA1 environment variable when running make to make use of
# a bundled SHA1 routine optimized for PowerPC.
#
# Define ARM_SHA1 environment variable when running make to make use of
# a bundled SHA1 routine optimized for ARM.
#
# Define MOZILLA_SHA1 environment variable when running make to make use of
# a bundled SHA1 routine coming from Mozilla. It is GPL'd and should be fast
# on non-x86 architectures (e.g. PowerPC), while the OpenSSL version (default
# choice) has very fast version optimized for i586.
#
# Define NEEDS_SSL_WITH_CRYPTO if you need -lcrypto with -lssl (Darwin).
#
# Define NEEDS_LIBICONV if linking with libc is not enough (Darwin).
#
# Define NEEDS_SOCKET if linking with libc is not enough (SunOS,
# Patrick Mauritz).
#
# Define NO_MMAP if you want to avoid mmap.
#
# Define NO_PTHREADS if you do not have or do not want to use Pthreads.
#
# Define NO_PREAD if you have a problem with pread() system call (e.g.
# cygwin.dll before v1.5.22).
#
# Define NO_FAST_WORKING_DIRECTORY if accessing objects in pack files is
# generally faster on your platform than accessing the working directory.
#
# Define NO_TRUSTABLE_FILEMODE if your filesystem may claim to support
# the executable mode bit, but doesn't really do so.
#
# Define NO_IPV6 if you lack IPv6 support and getaddrinfo().
#
# Define NO_SOCKADDR_STORAGE if your platform does not have struct
# sockaddr_storage.
#
# Define NO_ICONV if your libc does not properly support iconv.
#
# Define OLD_ICONV if your library has an old iconv(), where the second
# (input buffer pointer) parameter is declared with type (const char **).
#
# Define NO_DEFLATE_BOUND if your zlib does not have deflateBound.
#
# Define NO_R_TO_GCC_LINKER if your gcc does not like "-R/path/lib"
# that tells runtime paths to dynamic libraries;
# "-Wl,-rpath=/path/lib" is used instead.
#
# Define USE_NSEC below if you want perf to care about sub-second file mtimes
# and ctimes. Note that you need recent glibc (at least 2.2.4) for this, and
# it will BREAK YOUR LOCAL DIFFS! show-diff and anything using it will likely
# randomly break unless your underlying filesystem supports those sub-second
# times (my ext3 doesn't).
#
# Define USE_ST_TIMESPEC if your "struct stat" uses "st_ctimespec" instead of
# "st_ctim"
#
# Define NO_NSEC if your "struct stat" does not have "st_ctim.tv_nsec"
# available. This automatically turns USE_NSEC off.
#
# Define USE_STDEV below if you want perf to care about the underlying device
# change being considered an inode change from the update-index perspective.
#
# Define NO_ST_BLOCKS_IN_STRUCT_STAT if your platform does not have st_blocks
# field that counts the on-disk footprint in 512-byte blocks.
#
# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8
#
# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72.
#
# Define NO_PERL_MAKEMAKER if you cannot use Makefiles generated by perl's
# MakeMaker (e.g. using ActiveState under Cygwin).
#
# Define NO_PERL if you do not want Perl scripts or libraries at all.
#
# Define INTERNAL_QSORT to use Git's implementation of qsort(), which
# is a simplified version of the merge sort used in glibc. This is
# recommended if Git triggers O(n^2) behavior in your platform's qsort().
#
# Define NO_EXTERNAL_GREP if you don't want "perf grep" to ever call
# your external grep (e.g., if your system lacks grep, if its grep is
# broken, or spawning external process is slower than built-in grep perf has).
PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
@$(SHELL_PATH) util/PERF-VERSION-GEN
-include PERF-VERSION-FILE
uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not')
uname_O := $(shell sh -c 'uname -o 2>/dev/null || echo not')
uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not')
uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not')
uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
# CFLAGS and LDFLAGS are for the users to override from the command line.
CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6
LDFLAGS = -lpthread -lrt -lelf
ALL_CFLAGS = $(CFLAGS)
ALL_LDFLAGS = $(LDFLAGS)
STRIP ?= strip
# Among the variables below, these:
# perfexecdir
# template_dir
# mandir
# infodir
# htmldir
# ETC_PERFCONFIG (but not sysconfdir)
# can be specified as a relative path some/where/else;
# this is interpreted as relative to $(prefix) and "perf" at
# runtime figures out where they are based on the path to the executable.
# This can help installing the suite in a relocatable way.
prefix = $(HOME)
bindir_relative = bin
bindir = $(prefix)/$(bindir_relative)
mandir = share/man
infodir = share/info
perfexecdir = libexec/perf-core
sharedir = $(prefix)/share
template_dir = share/perf-core/templates
htmldir = share/doc/perf-doc
ifeq ($(prefix),/usr)
sysconfdir = /etc
ETC_PERFCONFIG = $(sysconfdir)/perfconfig
else
sysconfdir = $(prefix)/etc
ETC_PERFCONFIG = etc/perfconfig
endif
lib = lib
# DESTDIR=
export prefix bindir sharedir sysconfdir
CC = gcc
AR = ar
RM = rm -f
TAR = tar
FIND = find
INSTALL = install
RPMBUILD = rpmbuild
PTHREAD_LIBS = -lpthread
# sparse is architecture-neutral, which means that we need to tell it
# explicitly what architecture to check for. Fix this up for yours..
SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
### --- END CONFIGURATION SECTION ---
# Those must not be GNU-specific; they are shared with perl/ which may
# be built by a different compiler. (Note that this is an artifact now
# but it still might be nice to keep that distinction.)
BASIC_CFLAGS =
BASIC_LDFLAGS =
# Guard against environment variables
BUILTIN_OBJS =
BUILT_INS =
COMPAT_CFLAGS =
COMPAT_OBJS =
LIB_H =
LIB_OBJS =
SCRIPT_PERL =
SCRIPT_SH =
TEST_PROGRAMS =
#
# No scripts right now:
#
# SCRIPT_SH += perf-am.sh
#
# No Perl scripts right now:
#
# SCRIPT_PERL += perf-add--interactive.perl
SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) \
$(patsubst %.perl,%,$(SCRIPT_PERL))
# Empty...
EXTRA_PROGRAMS =
# ... and all the rest that could be moved out of bindir to perfexecdir
PROGRAMS += $(EXTRA_PROGRAMS)
#
# Single 'perf' binary right now:
#
PROGRAMS += perf
# List built-in command $C whose implementation cmd_$C() is not in
# builtin-$C.o but is linked in as part of some other command.
#
# None right now:
#
# BUILT_INS += perf-init $X
# what 'all' will build and 'install' will install, in perfexecdir
ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
# what 'all' will build but not install in perfexecdir
OTHER_PROGRAMS = perf$X
# Set paths to tools early so that they can be used for version tests.
ifndef SHELL_PATH
SHELL_PATH = /bin/sh
endif
ifndef PERL_PATH
PERL_PATH = /usr/bin/perl
endif
export PERL_PATH
LIB_FILE=libperf.a
LIB_H += ../../include/linux/perf_counter.h
LIB_H += perf.h
LIB_H += util/list.h
LIB_H += util/rbtree.h
LIB_H += util/levenshtein.h
LIB_H += util/parse-options.h
LIB_H += util/parse-events.h
LIB_H += util/quote.h
LIB_H += util/util.h
LIB_H += util/help.h
LIB_H += util/strbuf.h
LIB_H += util/string.h
LIB_H += util/run-command.h
LIB_H += util/sigchain.h
LIB_H += util/symbol.h
LIB_H += util/color.h
LIB_OBJS += util/abspath.o
LIB_OBJS += util/alias.o
LIB_OBJS += util/config.o
LIB_OBJS += util/ctype.o
LIB_OBJS += util/environment.o
LIB_OBJS += util/exec_cmd.o
LIB_OBJS += util/help.o
LIB_OBJS += util/levenshtein.o
LIB_OBJS += util/parse-options.o
LIB_OBJS += util/parse-events.o
LIB_OBJS += util/path.o
LIB_OBJS += util/rbtree.o
LIB_OBJS += util/run-command.o
LIB_OBJS += util/quote.o
LIB_OBJS += util/strbuf.o
LIB_OBJS += util/string.o
LIB_OBJS += util/usage.o
LIB_OBJS += util/wrapper.o
LIB_OBJS += util/sigchain.o
LIB_OBJS += util/symbol.o
LIB_OBJS += util/color.o
LIB_OBJS += util/pager.o
BUILTIN_OBJS += builtin-annotate.o
BUILTIN_OBJS += builtin-help.o
BUILTIN_OBJS += builtin-list.o
BUILTIN_OBJS += builtin-record.o
BUILTIN_OBJS += builtin-report.o
BUILTIN_OBJS += builtin-stat.o
BUILTIN_OBJS += builtin-top.o
PERFLIBS = $(LIB_FILE)
EXTLIBS =
#
# Platform specific tweaks
#
# We choose to avoid "if .. else if .. else .. endif endif"
# because maintaining the nesting to match is a pain. If
# we had "elif" things would have been much nicer...
-include config.mak.autogen
-include config.mak
ifeq ($(uname_S),Darwin)
ifndef NO_FINK
ifeq ($(shell test -d /sw/lib && echo y),y)
BASIC_CFLAGS += -I/sw/include
BASIC_LDFLAGS += -L/sw/lib
endif
endif
ifndef NO_DARWIN_PORTS
ifeq ($(shell test -d /opt/local/lib && echo y),y)
BASIC_CFLAGS += -I/opt/local/include
BASIC_LDFLAGS += -L/opt/local/lib
endif
endif
PTHREAD_LIBS =
endif
ifndef CC_LD_DYNPATH
ifdef NO_R_TO_GCC_LINKER
# Some gcc does not accept and pass -R to the linker to specify
# the runtime dynamic library path.
CC_LD_DYNPATH = -Wl,-rpath,
else
CC_LD_DYNPATH = -R
endif
endif
ifdef ZLIB_PATH
BASIC_CFLAGS += -I$(ZLIB_PATH)/include
EXTLIBS += -L$(ZLIB_PATH)/$(lib) $(CC_LD_DYNPATH)$(ZLIB_PATH)/$(lib)
endif
EXTLIBS += -lz
ifdef NEEDS_SOCKET
EXTLIBS += -lsocket
endif
ifdef NEEDS_NSL
EXTLIBS += -lnsl
endif
ifdef NO_D_TYPE_IN_DIRENT
BASIC_CFLAGS += -DNO_D_TYPE_IN_DIRENT
endif
ifdef NO_D_INO_IN_DIRENT
BASIC_CFLAGS += -DNO_D_INO_IN_DIRENT
endif
ifdef NO_ST_BLOCKS_IN_STRUCT_STAT
BASIC_CFLAGS += -DNO_ST_BLOCKS_IN_STRUCT_STAT
endif
ifdef USE_NSEC
BASIC_CFLAGS += -DUSE_NSEC
endif
ifdef USE_ST_TIMESPEC
BASIC_CFLAGS += -DUSE_ST_TIMESPEC
endif
ifdef NO_NSEC
BASIC_CFLAGS += -DNO_NSEC
endif
ifdef NO_C99_FORMAT
BASIC_CFLAGS += -DNO_C99_FORMAT
endif
ifdef SNPRINTF_RETURNS_BOGUS
COMPAT_CFLAGS += -DSNPRINTF_RETURNS_BOGUS
COMPAT_OBJS += compat/snprintf.o
endif
ifdef FREAD_READS_DIRECTORIES
COMPAT_CFLAGS += -DFREAD_READS_DIRECTORIES
COMPAT_OBJS += compat/fopen.o
endif
ifdef NO_SYMLINK_HEAD
BASIC_CFLAGS += -DNO_SYMLINK_HEAD
endif
ifdef NO_STRCASESTR
COMPAT_CFLAGS += -DNO_STRCASESTR
COMPAT_OBJS += compat/strcasestr.o
endif
ifdef NO_STRTOUMAX
COMPAT_CFLAGS += -DNO_STRTOUMAX
COMPAT_OBJS += compat/strtoumax.o
endif
ifdef NO_STRTOULL
COMPAT_CFLAGS += -DNO_STRTOULL
endif
ifdef NO_SETENV
COMPAT_CFLAGS += -DNO_SETENV
COMPAT_OBJS += compat/setenv.o
endif
ifdef NO_MKDTEMP
COMPAT_CFLAGS += -DNO_MKDTEMP
COMPAT_OBJS += compat/mkdtemp.o
endif
ifdef NO_UNSETENV
COMPAT_CFLAGS += -DNO_UNSETENV
COMPAT_OBJS += compat/unsetenv.o
endif
ifdef NO_SYS_SELECT_H
BASIC_CFLAGS += -DNO_SYS_SELECT_H
endif
ifdef NO_MMAP
COMPAT_CFLAGS += -DNO_MMAP
COMPAT_OBJS += compat/mmap.o
else
ifdef USE_WIN32_MMAP
COMPAT_CFLAGS += -DUSE_WIN32_MMAP
COMPAT_OBJS += compat/win32mmap.o
endif
endif
ifdef NO_PREAD
COMPAT_CFLAGS += -DNO_PREAD
COMPAT_OBJS += compat/pread.o
endif
ifdef NO_FAST_WORKING_DIRECTORY
BASIC_CFLAGS += -DNO_FAST_WORKING_DIRECTORY
endif
ifdef NO_TRUSTABLE_FILEMODE
BASIC_CFLAGS += -DNO_TRUSTABLE_FILEMODE
endif
ifdef NO_IPV6
BASIC_CFLAGS += -DNO_IPV6
endif
ifdef NO_UINTMAX_T
BASIC_CFLAGS += -Duintmax_t=uint32_t
endif
ifdef NO_SOCKADDR_STORAGE
ifdef NO_IPV6
BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in
else
BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in6
endif
endif
ifdef NO_INET_NTOP
LIB_OBJS += compat/inet_ntop.o
endif
ifdef NO_INET_PTON
LIB_OBJS += compat/inet_pton.o
endif
ifdef NO_ICONV
BASIC_CFLAGS += -DNO_ICONV
endif
ifdef OLD_ICONV
BASIC_CFLAGS += -DOLD_ICONV
endif
ifdef NO_DEFLATE_BOUND
BASIC_CFLAGS += -DNO_DEFLATE_BOUND
endif
ifdef PPC_SHA1
SHA1_HEADER = "ppc/sha1.h"
LIB_OBJS += ppc/sha1.o ppc/sha1ppc.o
else
ifdef ARM_SHA1
SHA1_HEADER = "arm/sha1.h"
LIB_OBJS += arm/sha1.o arm/sha1_arm.o
else
ifdef MOZILLA_SHA1
SHA1_HEADER = "mozilla-sha1/sha1.h"
LIB_OBJS += mozilla-sha1/sha1.o
else
SHA1_HEADER = <openssl/sha.h>
EXTLIBS += $(LIB_4_CRYPTO)
endif
endif
endif
ifdef NO_PERL_MAKEMAKER
export NO_PERL_MAKEMAKER
endif
ifdef NO_HSTRERROR
COMPAT_CFLAGS += -DNO_HSTRERROR
COMPAT_OBJS += compat/hstrerror.o
endif
ifdef NO_MEMMEM
COMPAT_CFLAGS += -DNO_MEMMEM
COMPAT_OBJS += compat/memmem.o
endif
ifdef INTERNAL_QSORT
COMPAT_CFLAGS += -DINTERNAL_QSORT
COMPAT_OBJS += compat/qsort.o
endif
ifdef RUNTIME_PREFIX
COMPAT_CFLAGS += -DRUNTIME_PREFIX
endif
ifdef DIR_HAS_BSD_GROUP_SEMANTICS
COMPAT_CFLAGS += -DDIR_HAS_BSD_GROUP_SEMANTICS
endif
ifdef NO_EXTERNAL_GREP
BASIC_CFLAGS += -DNO_EXTERNAL_GREP
endif
ifeq ($(PERL_PATH),)
NO_PERL=NoThanks
endif
QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir
QUIET_SUBDIR1 =
ifneq ($(findstring $(MAKEFLAGS),w),w)
PRINT_DIR = --no-print-directory
else # "make -w"
NO_SUBDIR = :
endif
ifneq ($(findstring $(MAKEFLAGS),s),s)
ifndef V
QUIET_CC = @echo ' ' CC $@;
QUIET_AR = @echo ' ' AR $@;
QUIET_LINK = @echo ' ' LINK $@;
QUIET_BUILT_IN = @echo ' ' BUILTIN $@;
QUIET_GEN = @echo ' ' GEN $@;
QUIET_SUBDIR0 = +@subdir=
QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \
$(MAKE) $(PRINT_DIR) -C $$subdir
export V
export QUIET_GEN
export QUIET_BUILT_IN
endif
endif
ifdef ASCIIDOC8
export ASCIIDOC8
endif
# Shell quote (do not use $(call) to accommodate ancient setups);
SHA1_HEADER_SQ = $(subst ','\'',$(SHA1_HEADER))
ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG))
DESTDIR_SQ = $(subst ','\'',$(DESTDIR))
bindir_SQ = $(subst ','\'',$(bindir))
bindir_relative_SQ = $(subst ','\'',$(bindir_relative))
mandir_SQ = $(subst ','\'',$(mandir))
infodir_SQ = $(subst ','\'',$(infodir))
perfexecdir_SQ = $(subst ','\'',$(perfexecdir))
template_dir_SQ = $(subst ','\'',$(template_dir))
htmldir_SQ = $(subst ','\'',$(htmldir))
prefix_SQ = $(subst ','\'',$(prefix))
SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH))
LIBS = $(PERFLIBS) $(EXTLIBS)
BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \
$(COMPAT_CFLAGS)
LIB_OBJS += $(COMPAT_OBJS)
ALL_CFLAGS += $(BASIC_CFLAGS)
ALL_LDFLAGS += $(BASIC_LDFLAGS)
export TAR INSTALL DESTDIR SHELL_PATH
### Build rules
SHELL = $(SHELL_PATH)
all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) PERF-BUILD-OPTIONS
ifneq (,$X)
$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';)
endif
all::
please_set_SHELL_PATH_to_a_more_modern_shell:
@$$(:)
shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
strip: $(PROGRAMS) perf$X
$(STRIP) $(STRIP_OPTS) $(PROGRAMS) perf$X
perf.o: perf.c common-cmds.h PERF-CFLAGS
$(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \
'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
$(ALL_CFLAGS) -c $(filter %.c,$^)
perf$X: perf.o $(BUILTIN_OBJS) $(PERFLIBS)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ perf.o \
$(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS)
builtin-help.o: builtin-help.c common-cmds.h PERF-CFLAGS
$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \
'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
'-DPERF_MAN_PATH="$(mandir_SQ)"' \
'-DPERF_INFO_PATH="$(infodir_SQ)"' $<
$(BUILT_INS): perf$X
$(QUIET_BUILT_IN)$(RM) $@ && \
ln perf$X $@ 2>/dev/null || \
ln -s perf$X $@ 2>/dev/null || \
cp perf$X $@
common-cmds.h: util/generate-cmdlist.sh command-list.txt
common-cmds.h: $(wildcard Documentation/perf-*.txt)
$(QUIET_GEN)util/generate-cmdlist.sh > $@+ && mv $@+ $@
$(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh
$(QUIET_GEN)$(RM) $@ $@+ && \
sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \
-e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \
-e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \
-e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \
-e 's/@@NO_CURL@@/$(NO_CURL)/g' \
$@.sh >$@+ && \
chmod +x $@+ && \
mv $@+ $@
configure: configure.ac
$(QUIET_GEN)$(RM) $@ $<+ && \
sed -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \
$< > $<+ && \
autoconf -o $@ $<+ && \
$(RM) $<+
# These can record PERF_VERSION
perf.o perf.spec \
$(patsubst %.sh,%,$(SCRIPT_SH)) \
$(patsubst %.perl,%,$(SCRIPT_PERL)) \
: PERF-VERSION-FILE
%.o: %.c PERF-CFLAGS
$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $<
%.s: %.c PERF-CFLAGS
$(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $<
%.o: %.S
$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $<
util/exec_cmd.o: util/exec_cmd.c PERF-CFLAGS
$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \
'-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \
'-DBINDIR="$(bindir_relative_SQ)"' \
'-DPREFIX="$(prefix_SQ)"' \
$<
builtin-init-db.o: builtin-init-db.c PERF-CFLAGS
$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DDEFAULT_PERF_TEMPLATE_DIR='"$(template_dir_SQ)"' $<
util/config.o: util/config.c PERF-CFLAGS
$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
perf-%$X: %.o $(PERFLIBS)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
$(patsubst perf-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
builtin-revert.o wt-status.o: wt-status.h
$(LIB_FILE): $(LIB_OBJS)
$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
doc:
$(MAKE) -C Documentation all
man:
$(MAKE) -C Documentation man
html:
$(MAKE) -C Documentation html
info:
$(MAKE) -C Documentation info
pdf:
$(MAKE) -C Documentation pdf
TAGS:
$(RM) TAGS
$(FIND) . -name '*.[hcS]' -print | xargs etags -a
tags:
$(RM) tags
$(FIND) . -name '*.[hcS]' -print | xargs ctags -a
cscope:
$(RM) cscope*
$(FIND) . -name '*.[hcS]' -print | xargs cscope -b
### Detect prefix changes
TRACK_CFLAGS = $(subst ','\'',$(ALL_CFLAGS)):\
$(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ)
PERF-CFLAGS: .FORCE-PERF-CFLAGS
@FLAGS='$(TRACK_CFLAGS)'; \
if test x"$$FLAGS" != x"`cat PERF-CFLAGS 2>/dev/null`" ; then \
echo 1>&2 " * new build flags or prefix"; \
echo "$$FLAGS" >PERF-CFLAGS; \
fi
# We need to apply sq twice, once to protect from the shell
# that runs PERF-BUILD-OPTIONS, and then again to protect it
# and the first level quoting from the shell that runs "echo".
PERF-BUILD-OPTIONS: .FORCE-PERF-BUILD-OPTIONS
@echo SHELL_PATH=\''$(subst ','\'',$(SHELL_PATH_SQ))'\' >$@
@echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@
@echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@
@echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@
### Testing rules
#
# None right now:
#
# TEST_PROGRAMS += test-something$X
all:: $(TEST_PROGRAMS)
# GNU make supports exporting all variables by "export" without parameters.
# However, the environment gets quite big, and some programs have problems
# with that.
export NO_SVN_TESTS
check: common-cmds.h
if sparse; \
then \
for i in *.c */*.c; \
do \
sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; \
done; \
else \
echo 2>&1 "Did you mean 'make test'?"; \
exit 1; \
fi
remove-dashes:
./fixup-builtins $(BUILT_INS) $(PROGRAMS) $(SCRIPTS)
### Installation rules
ifneq ($(filter /%,$(firstword $(template_dir))),)
template_instdir = $(template_dir)
else
template_instdir = $(prefix)/$(template_dir)
endif
export template_instdir
ifneq ($(filter /%,$(firstword $(perfexecdir))),)
perfexec_instdir = $(perfexecdir)
else
perfexec_instdir = $(prefix)/$(perfexecdir)
endif
perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
export perfexec_instdir
install: all
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
$(INSTALL) perf$X '$(DESTDIR_SQ)$(bindir_SQ)'
ifdef BUILT_INS
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
$(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
ifneq (,$X)
$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';)
endif
endif
install-doc:
$(MAKE) -C Documentation install
install-man:
$(MAKE) -C Documentation install-man
install-html:
$(MAKE) -C Documentation install-html
install-info:
$(MAKE) -C Documentation install-info
install-pdf:
$(MAKE) -C Documentation install-pdf
quick-install-doc:
$(MAKE) -C Documentation quick-install
quick-install-man:
$(MAKE) -C Documentation quick-install-man
quick-install-html:
$(MAKE) -C Documentation quick-install-html
### Maintainer's dist rules
#
# None right now
#
#
# perf.spec: perf.spec.in
# sed -e 's/@@VERSION@@/$(PERF_VERSION)/g' < $< > $@+
# mv $@+ $@
#
# PERF_TARNAME=perf-$(PERF_VERSION)
# dist: perf.spec perf-archive$(X) configure
# ./perf-archive --format=tar \
# --prefix=$(PERF_TARNAME)/ HEAD^{tree} > $(PERF_TARNAME).tar
# @mkdir -p $(PERF_TARNAME)
# @cp perf.spec configure $(PERF_TARNAME)
# @echo $(PERF_VERSION) > $(PERF_TARNAME)/version
# $(TAR) rf $(PERF_TARNAME).tar \
# $(PERF_TARNAME)/perf.spec \
# $(PERF_TARNAME)/configure \
# $(PERF_TARNAME)/version
# @$(RM) -r $(PERF_TARNAME)
# gzip -f -9 $(PERF_TARNAME).tar
#
# htmldocs = perf-htmldocs-$(PERF_VERSION)
# manpages = perf-manpages-$(PERF_VERSION)
# dist-doc:
# $(RM) -r .doc-tmp-dir
# mkdir .doc-tmp-dir
# $(MAKE) -C Documentation WEBDOC_DEST=../.doc-tmp-dir install-webdoc
# cd .doc-tmp-dir && $(TAR) cf ../$(htmldocs).tar .
# gzip -n -9 -f $(htmldocs).tar
# :
# $(RM) -r .doc-tmp-dir
# mkdir -p .doc-tmp-dir/man1 .doc-tmp-dir/man5 .doc-tmp-dir/man7
# $(MAKE) -C Documentation DESTDIR=./ \
# man1dir=../.doc-tmp-dir/man1 \
# man5dir=../.doc-tmp-dir/man5 \
# man7dir=../.doc-tmp-dir/man7 \
# install
# cd .doc-tmp-dir && $(TAR) cf ../$(manpages).tar .
# gzip -n -9 -f $(manpages).tar
# $(RM) -r .doc-tmp-dir
#
# rpm: dist
# $(RPMBUILD) -ta $(PERF_TARNAME).tar.gz
### Cleaning rules
distclean: clean
# $(RM) configure
clean:
$(RM) *.o */*.o $(LIB_FILE)
$(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X
$(RM) $(TEST_PROGRAMS)
$(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags cscope*
$(RM) -r autom4te.cache
$(RM) config.log config.mak.autogen config.mak.append config.status config.cache
$(RM) -r $(PERF_TARNAME) .doc-tmp-dir
$(RM) $(PERF_TARNAME).tar.gz perf-core_$(PERF_VERSION)-*.tar.gz
$(RM) $(htmldocs).tar.gz $(manpages).tar.gz
$(MAKE) -C Documentation/ clean
$(RM) PERF-VERSION-FILE PERF-CFLAGS PERF-BUILD-OPTIONS
.PHONY: all install clean strip
.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
.PHONY: .FORCE-PERF-BUILD-OPTIONS
### Make sure built-ins do not have dups and listed in perf.c
#
check-builtins::
./check-builtins.sh
### Test suite coverage testing
#
# None right now
#
# .PHONY: coverage coverage-clean coverage-build coverage-report
#
# coverage:
# $(MAKE) coverage-build
# $(MAKE) coverage-report
#
# coverage-clean:
# rm -f *.gcda *.gcno
#
# COVERAGE_CFLAGS = $(CFLAGS) -O0 -ftest-coverage -fprofile-arcs
# COVERAGE_LDFLAGS = $(CFLAGS) -O0 -lgcov
#
# coverage-build: coverage-clean
# $(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" all
# $(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" \
# -j1 test
#
# coverage-report:
# gcov -b *.c */*.c
# grep '^function.*called 0 ' *.c.gcov */*.c.gcov \
# | sed -e 's/\([^:]*\)\.gcov: *function \([^ ]*\) called.*/\1: \2/' \
# | tee coverage-untested-functions

File diff suppressed because it is too large Load diff

461
tools/perf/builtin-help.c Normal file
View file

@ -0,0 +1,461 @@
/*
* builtin-help.c
*
* Builtin help command
*/
#include "util/cache.h"
#include "builtin.h"
#include "util/exec_cmd.h"
#include "common-cmds.h"
#include "util/parse-options.h"
#include "util/run-command.h"
#include "util/help.h"
static struct man_viewer_list {
struct man_viewer_list *next;
char name[FLEX_ARRAY];
} *man_viewer_list;
static struct man_viewer_info_list {
struct man_viewer_info_list *next;
const char *info;
char name[FLEX_ARRAY];
} *man_viewer_info_list;
enum help_format {
HELP_FORMAT_MAN,
HELP_FORMAT_INFO,
HELP_FORMAT_WEB,
};
static int show_all = 0;
static enum help_format help_format = HELP_FORMAT_MAN;
static struct option builtin_help_options[] = {
OPT_BOOLEAN('a', "all", &show_all, "print all available commands"),
OPT_SET_INT('m', "man", &help_format, "show man page", HELP_FORMAT_MAN),
OPT_SET_INT('w', "web", &help_format, "show manual in web browser",
HELP_FORMAT_WEB),
OPT_SET_INT('i', "info", &help_format, "show info page",
HELP_FORMAT_INFO),
OPT_END(),
};
static const char * const builtin_help_usage[] = {
"perf help [--all] [--man|--web|--info] [command]",
NULL
};
static enum help_format parse_help_format(const char *format)
{
if (!strcmp(format, "man"))
return HELP_FORMAT_MAN;
if (!strcmp(format, "info"))
return HELP_FORMAT_INFO;
if (!strcmp(format, "web") || !strcmp(format, "html"))
return HELP_FORMAT_WEB;
die("unrecognized help format '%s'", format);
}
static const char *get_man_viewer_info(const char *name)
{
struct man_viewer_info_list *viewer;
for (viewer = man_viewer_info_list; viewer; viewer = viewer->next)
{
if (!strcasecmp(name, viewer->name))
return viewer->info;
}
return NULL;
}
static int check_emacsclient_version(void)
{
struct strbuf buffer = STRBUF_INIT;
struct child_process ec_process;
const char *argv_ec[] = { "emacsclient", "--version", NULL };
int version;
/* emacsclient prints its version number on stderr */
memset(&ec_process, 0, sizeof(ec_process));
ec_process.argv = argv_ec;
ec_process.err = -1;
ec_process.stdout_to_stderr = 1;
if (start_command(&ec_process)) {
fprintf(stderr, "Failed to start emacsclient.\n");
return -1;
}
strbuf_read(&buffer, ec_process.err, 20);
close(ec_process.err);
/*
* Don't bother checking return value, because "emacsclient --version"
* seems to always exits with code 1.
*/
finish_command(&ec_process);
if (prefixcmp(buffer.buf, "emacsclient")) {
fprintf(stderr, "Failed to parse emacsclient version.\n");
strbuf_release(&buffer);
return -1;
}
strbuf_remove(&buffer, 0, strlen("emacsclient"));
version = atoi(buffer.buf);
if (version < 22) {
fprintf(stderr,
"emacsclient version '%d' too old (< 22).\n",
version);
strbuf_release(&buffer);
return -1;
}
strbuf_release(&buffer);
return 0;
}
static void exec_woman_emacs(const char* path, const char *page)
{
if (!check_emacsclient_version()) {
/* This works only with emacsclient version >= 22. */
struct strbuf man_page = STRBUF_INIT;
if (!path)
path = "emacsclient";
strbuf_addf(&man_page, "(woman \"%s\")", page);
execlp(path, "emacsclient", "-e", man_page.buf, NULL);
warning("failed to exec '%s': %s", path, strerror(errno));
}
}
static void exec_man_konqueror(const char* path, const char *page)
{
const char *display = getenv("DISPLAY");
if (display && *display) {
struct strbuf man_page = STRBUF_INIT;
const char *filename = "kfmclient";
/* It's simpler to launch konqueror using kfmclient. */
if (path) {
const char *file = strrchr(path, '/');
if (file && !strcmp(file + 1, "konqueror")) {
char *new = strdup(path);
char *dest = strrchr(new, '/');
/* strlen("konqueror") == strlen("kfmclient") */
strcpy(dest + 1, "kfmclient");
path = new;
}
if (file)
filename = file;
} else
path = "kfmclient";
strbuf_addf(&man_page, "man:%s(1)", page);
execlp(path, filename, "newTab", man_page.buf, NULL);
warning("failed to exec '%s': %s", path, strerror(errno));
}
}
static void exec_man_man(const char* path, const char *page)
{
if (!path)
path = "man";
execlp(path, "man", page, NULL);
warning("failed to exec '%s': %s", path, strerror(errno));
}
static void exec_man_cmd(const char *cmd, const char *page)
{
struct strbuf shell_cmd = STRBUF_INIT;
strbuf_addf(&shell_cmd, "%s %s", cmd, page);
execl("/bin/sh", "sh", "-c", shell_cmd.buf, NULL);
warning("failed to exec '%s': %s", cmd, strerror(errno));
}
static void add_man_viewer(const char *name)
{
struct man_viewer_list **p = &man_viewer_list;
size_t len = strlen(name);
while (*p)
p = &((*p)->next);
*p = calloc(1, (sizeof(**p) + len + 1));
strncpy((*p)->name, name, len);
}
static int supported_man_viewer(const char *name, size_t len)
{
return (!strncasecmp("man", name, len) ||
!strncasecmp("woman", name, len) ||
!strncasecmp("konqueror", name, len));
}
static void do_add_man_viewer_info(const char *name,
size_t len,
const char *value)
{
struct man_viewer_info_list *new = calloc(1, sizeof(*new) + len + 1);
strncpy(new->name, name, len);
new->info = strdup(value);
new->next = man_viewer_info_list;
man_viewer_info_list = new;
}
static int add_man_viewer_path(const char *name,
size_t len,
const char *value)
{
if (supported_man_viewer(name, len))
do_add_man_viewer_info(name, len, value);
else
warning("'%s': path for unsupported man viewer.\n"
"Please consider using 'man.<tool>.cmd' instead.",
name);
return 0;
}
static int add_man_viewer_cmd(const char *name,
size_t len,
const char *value)
{
if (supported_man_viewer(name, len))
warning("'%s': cmd for supported man viewer.\n"
"Please consider using 'man.<tool>.path' instead.",
name);
else
do_add_man_viewer_info(name, len, value);
return 0;
}
static int add_man_viewer_info(const char *var, const char *value)
{
const char *name = var + 4;
const char *subkey = strrchr(name, '.');
if (!subkey)
return error("Config with no key for man viewer: %s", name);
if (!strcmp(subkey, ".path")) {
if (!value)
return config_error_nonbool(var);
return add_man_viewer_path(name, subkey - name, value);
}
if (!strcmp(subkey, ".cmd")) {
if (!value)
return config_error_nonbool(var);
return add_man_viewer_cmd(name, subkey - name, value);
}
warning("'%s': unsupported man viewer sub key.", subkey);
return 0;
}
static int perf_help_config(const char *var, const char *value, void *cb)
{
if (!strcmp(var, "help.format")) {
if (!value)
return config_error_nonbool(var);
help_format = parse_help_format(value);
return 0;
}
if (!strcmp(var, "man.viewer")) {
if (!value)
return config_error_nonbool(var);
add_man_viewer(value);
return 0;
}
if (!prefixcmp(var, "man."))
return add_man_viewer_info(var, value);
return perf_default_config(var, value, cb);
}
static struct cmdnames main_cmds, other_cmds;
void list_common_cmds_help(void)
{
int i, longest = 0;
for (i = 0; i < ARRAY_SIZE(common_cmds); i++) {
if (longest < strlen(common_cmds[i].name))
longest = strlen(common_cmds[i].name);
}
puts(" The most commonly used perf commands are:");
for (i = 0; i < ARRAY_SIZE(common_cmds); i++) {
printf(" %s ", common_cmds[i].name);
mput_char(' ', longest - strlen(common_cmds[i].name));
puts(common_cmds[i].help);
}
}
static int is_perf_command(const char *s)
{
return is_in_cmdlist(&main_cmds, s) ||
is_in_cmdlist(&other_cmds, s);
}
static const char *prepend(const char *prefix, const char *cmd)
{
size_t pre_len = strlen(prefix);
size_t cmd_len = strlen(cmd);
char *p = malloc(pre_len + cmd_len + 1);
memcpy(p, prefix, pre_len);
strcpy(p + pre_len, cmd);
return p;
}
static const char *cmd_to_page(const char *perf_cmd)
{
if (!perf_cmd)
return "perf";
else if (!prefixcmp(perf_cmd, "perf"))
return perf_cmd;
else if (is_perf_command(perf_cmd))
return prepend("perf-", perf_cmd);
else
return prepend("perf-", perf_cmd);
}
static void setup_man_path(void)
{
struct strbuf new_path = STRBUF_INIT;
const char *old_path = getenv("MANPATH");
/* We should always put ':' after our path. If there is no
* old_path, the ':' at the end will let 'man' to try
* system-wide paths after ours to find the manual page. If
* there is old_path, we need ':' as delimiter. */
strbuf_addstr(&new_path, system_path(PERF_MAN_PATH));
strbuf_addch(&new_path, ':');
if (old_path)
strbuf_addstr(&new_path, old_path);
setenv("MANPATH", new_path.buf, 1);
strbuf_release(&new_path);
}
static void exec_viewer(const char *name, const char *page)
{
const char *info = get_man_viewer_info(name);
if (!strcasecmp(name, "man"))
exec_man_man(info, page);
else if (!strcasecmp(name, "woman"))
exec_woman_emacs(info, page);
else if (!strcasecmp(name, "konqueror"))
exec_man_konqueror(info, page);
else if (info)
exec_man_cmd(info, page);
else
warning("'%s': unknown man viewer.", name);
}
static void show_man_page(const char *perf_cmd)
{
struct man_viewer_list *viewer;
const char *page = cmd_to_page(perf_cmd);
const char *fallback = getenv("PERF_MAN_VIEWER");
setup_man_path();
for (viewer = man_viewer_list; viewer; viewer = viewer->next)
{
exec_viewer(viewer->name, page); /* will return when unable */
}
if (fallback)
exec_viewer(fallback, page);
exec_viewer("man", page);
die("no man viewer handled the request");
}
static void show_info_page(const char *perf_cmd)
{
const char *page = cmd_to_page(perf_cmd);
setenv("INFOPATH", system_path(PERF_INFO_PATH), 1);
execlp("info", "info", "perfman", page, NULL);
}
static void get_html_page_path(struct strbuf *page_path, const char *page)
{
struct stat st;
const char *html_path = system_path(PERF_HTML_PATH);
/* Check that we have a perf documentation directory. */
if (stat(mkpath("%s/perf.html", html_path), &st)
|| !S_ISREG(st.st_mode))
die("'%s': not a documentation directory.", html_path);
strbuf_init(page_path, 0);
strbuf_addf(page_path, "%s/%s.html", html_path, page);
}
/*
* If open_html is not defined in a platform-specific way (see for
* example compat/mingw.h), we use the script web--browse to display
* HTML.
*/
#ifndef open_html
static void open_html(const char *path)
{
execl_perf_cmd("web--browse", "-c", "help.browser", path, NULL);
}
#endif
static void show_html_page(const char *perf_cmd)
{
const char *page = cmd_to_page(perf_cmd);
struct strbuf page_path; /* it leaks but we exec bellow */
get_html_page_path(&page_path, page);
open_html(page_path.buf);
}
int cmd_help(int argc, const char **argv, const char *prefix)
{
const char *alias;
load_command_list("perf-", &main_cmds, &other_cmds);
perf_config(perf_help_config, NULL);
argc = parse_options(argc, argv, builtin_help_options,
builtin_help_usage, 0);
if (show_all) {
printf("\n usage: %s\n\n", perf_usage_string);
list_commands("perf commands", &main_cmds, &other_cmds);
printf(" %s\n\n", perf_more_info_string);
return 0;
}
if (!argv[0]) {
printf("\n usage: %s\n\n", perf_usage_string);
list_common_cmds_help();
printf("\n %s\n\n", perf_more_info_string);
return 0;
}
alias = alias_lookup(argv[0]);
if (alias && !is_perf_command(argv[0])) {
printf("`perf %s' is aliased to `%s'\n", argv[0], alias);
return 0;
}
switch (help_format) {
case HELP_FORMAT_MAN:
show_man_page(argv[0]);
break;
case HELP_FORMAT_INFO:
show_info_page(argv[0]);
break;
case HELP_FORMAT_WEB:
show_html_page(argv[0]);
break;
}
return 0;
}

20
tools/perf/builtin-list.c Normal file
View file

@ -0,0 +1,20 @@
/*
* builtin-list.c
*
* Builtin list command: list all event types
*
* Copyright (C) 2009, Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2009, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
*/
#include "builtin.h"
#include "perf.h"
#include "util/parse-options.h"
#include "util/parse-events.h"
int cmd_list(int argc, const char **argv, const char *prefix)
{
print_events();
return 0;
}

582
tools/perf/builtin-record.c Normal file
View file

@ -0,0 +1,582 @@
/*
* builtin-record.c
*
* Builtin record command: Record the profile of a workload
* (or a CPU, or a PID) into the perf.data output file - for
* later analysis via perf report.
*/
#include "builtin.h"
#include "perf.h"
#include "util/util.h"
#include "util/parse-options.h"
#include "util/parse-events.h"
#include "util/string.h"
#include <unistd.h>
#include <sched.h>
#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
static int fd[MAX_NR_CPUS][MAX_COUNTERS];
static long default_interval = 100000;
static int nr_cpus = 0;
static unsigned int page_size;
static unsigned int mmap_pages = 128;
static int freq = 0;
static int output;
static const char *output_name = "perf.data";
static int group = 0;
static unsigned int realtime_prio = 0;
static int system_wide = 0;
static pid_t target_pid = -1;
static int inherit = 1;
static int force = 0;
static int append_file = 0;
static int verbose = 0;
static long samples;
static struct timeval last_read;
static struct timeval this_read;
static __u64 bytes_written;
static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
static int nr_poll;
static int nr_cpu;
struct mmap_event {
struct perf_event_header header;
__u32 pid;
__u32 tid;
__u64 start;
__u64 len;
__u64 pgoff;
char filename[PATH_MAX];
};
struct comm_event {
struct perf_event_header header;
__u32 pid;
__u32 tid;
char comm[16];
};
struct mmap_data {
int counter;
void *base;
unsigned int mask;
unsigned int prev;
};
static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
static unsigned int mmap_read_head(struct mmap_data *md)
{
struct perf_counter_mmap_page *pc = md->base;
int head;
head = pc->data_head;
rmb();
return head;
}
static void mmap_read(struct mmap_data *md)
{
unsigned int head = mmap_read_head(md);
unsigned int old = md->prev;
unsigned char *data = md->base + page_size;
unsigned long size;
void *buf;
int diff;
gettimeofday(&this_read, NULL);
/*
* If we're further behind than half the buffer, there's a chance
* the writer will bite our tail and mess up the samples under us.
*
* If we somehow ended up ahead of the head, we got messed up.
*
* In either case, truncate and restart at head.
*/
diff = head - old;
if (diff > md->mask / 2 || diff < 0) {
struct timeval iv;
unsigned long msecs;
timersub(&this_read, &last_read, &iv);
msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
fprintf(stderr, "WARNING: failed to keep up with mmap data."
" Last read %lu msecs ago.\n", msecs);
/*
* head points to a known good entry, start there.
*/
old = head;
}
last_read = this_read;
if (old != head)
samples++;
size = head - old;
if ((old & md->mask) + size != (head & md->mask)) {
buf = &data[old & md->mask];
size = md->mask + 1 - (old & md->mask);
old += size;
while (size) {
int ret = write(output, buf, size);
if (ret < 0)
die("failed to write");
size -= ret;
buf += ret;
bytes_written += ret;
}
}
buf = &data[old & md->mask];
size = head - old;
old += size;
while (size) {
int ret = write(output, buf, size);
if (ret < 0)
die("failed to write");
size -= ret;
buf += ret;
bytes_written += ret;
}
md->prev = old;
}
static volatile int done = 0;
static volatile int signr = -1;
static void sig_handler(int sig)
{
done = 1;
signr = sig;
}
static void sig_atexit(void)
{
if (signr == -1)
return;
signal(signr, SIG_DFL);
kill(getpid(), signr);
}
static void pid_synthesize_comm_event(pid_t pid, int full)
{
struct comm_event comm_ev;
char filename[PATH_MAX];
char bf[BUFSIZ];
int fd, ret;
size_t size;
char *field, *sep;
DIR *tasks;
struct dirent dirent, *next;
snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
fd = open(filename, O_RDONLY);
if (fd < 0) {
fprintf(stderr, "couldn't open %s\n", filename);
exit(EXIT_FAILURE);
}
if (read(fd, bf, sizeof(bf)) < 0) {
fprintf(stderr, "couldn't read %s\n", filename);
exit(EXIT_FAILURE);
}
close(fd);
/* 9027 (cat) R 6747 9027 6747 34816 9027 ... */
memset(&comm_ev, 0, sizeof(comm_ev));
field = strchr(bf, '(');
if (field == NULL)
goto out_failure;
sep = strchr(++field, ')');
if (sep == NULL)
goto out_failure;
size = sep - field;
memcpy(comm_ev.comm, field, size++);
comm_ev.pid = pid;
comm_ev.header.type = PERF_EVENT_COMM;
size = ALIGN(size, sizeof(__u64));
comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
if (!full) {
comm_ev.tid = pid;
ret = write(output, &comm_ev, comm_ev.header.size);
if (ret < 0) {
perror("failed to write");
exit(-1);
}
return;
}
snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
tasks = opendir(filename);
while (!readdir_r(tasks, &dirent, &next) && next) {
char *end;
pid = strtol(dirent.d_name, &end, 10);
if (*end)
continue;
comm_ev.tid = pid;
ret = write(output, &comm_ev, comm_ev.header.size);
if (ret < 0) {
perror("failed to write");
exit(-1);
}
}
closedir(tasks);
return;
out_failure:
fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
filename);
exit(EXIT_FAILURE);
}
static void pid_synthesize_mmap_samples(pid_t pid)
{
char filename[PATH_MAX];
FILE *fp;
snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
fp = fopen(filename, "r");
if (fp == NULL) {
fprintf(stderr, "couldn't open %s\n", filename);
exit(EXIT_FAILURE);
}
while (1) {
char bf[BUFSIZ], *pbf = bf;
struct mmap_event mmap_ev = {
.header.type = PERF_EVENT_MMAP,
};
int n;
size_t size;
if (fgets(bf, sizeof(bf), fp) == NULL)
break;
/* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
n = hex2u64(pbf, &mmap_ev.start);
if (n < 0)
continue;
pbf += n + 1;
n = hex2u64(pbf, &mmap_ev.len);
if (n < 0)
continue;
pbf += n + 3;
if (*pbf == 'x') { /* vm_exec */
char *execname = strrchr(bf, ' ');
if (execname == NULL || execname[1] != '/')
continue;
execname += 1;
size = strlen(execname);
execname[size - 1] = '\0'; /* Remove \n */
memcpy(mmap_ev.filename, execname, size);
size = ALIGN(size, sizeof(__u64));
mmap_ev.len -= mmap_ev.start;
mmap_ev.header.size = (sizeof(mmap_ev) -
(sizeof(mmap_ev.filename) - size));
mmap_ev.pid = pid;
mmap_ev.tid = pid;
if (write(output, &mmap_ev, mmap_ev.header.size) < 0) {
perror("failed to write");
exit(-1);
}
}
}
fclose(fp);
}
static void synthesize_samples(void)
{
DIR *proc;
struct dirent dirent, *next;
proc = opendir("/proc");
while (!readdir_r(proc, &dirent, &next) && next) {
char *end;
pid_t pid;
pid = strtol(dirent.d_name, &end, 10);
if (*end) /* only interested in proper numerical dirents */
continue;
pid_synthesize_comm_event(pid, 1);
pid_synthesize_mmap_samples(pid);
}
closedir(proc);
}
static int group_fd;
static void create_counter(int counter, int cpu, pid_t pid)
{
struct perf_counter_attr *attr = attrs + counter;
int track = 1;
attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
if (freq) {
attr->sample_type |= PERF_SAMPLE_PERIOD;
attr->freq = 1;
attr->sample_freq = freq;
}
attr->mmap = track;
attr->comm = track;
attr->inherit = (cpu < 0) && inherit;
attr->disabled = 1;
track = 0; /* only the first counter needs these */
try_again:
fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0);
if (fd[nr_cpu][counter] < 0) {
int err = errno;
if (err == EPERM)
die("Permission error - are you root?\n");
/*
* If it's cycles then fall back to hrtimer
* based cpu-clock-tick sw counter, which
* is always available even if no PMU support:
*/
if (attr->type == PERF_TYPE_HARDWARE
&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
if (verbose)
warning(" ... trying to fall back to cpu-clock-ticks\n");
attr->type = PERF_TYPE_SOFTWARE;
attr->config = PERF_COUNT_SW_CPU_CLOCK;
goto try_again;
}
printf("\n");
error("perfcounter syscall returned with %d (%s)\n",
fd[nr_cpu][counter], strerror(err));
die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n");
exit(-1);
}
assert(fd[nr_cpu][counter] >= 0);
fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
/*
* First counter acts as the group leader:
*/
if (group && group_fd == -1)
group_fd = fd[nr_cpu][counter];
event_array[nr_poll].fd = fd[nr_cpu][counter];
event_array[nr_poll].events = POLLIN;
nr_poll++;
mmap_array[nr_cpu][counter].counter = counter;
mmap_array[nr_cpu][counter].prev = 0;
mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
error("failed to mmap with %d (%s)\n", errno, strerror(errno));
exit(-1);
}
ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_ENABLE);
}
static void open_counters(int cpu, pid_t pid)
{
int counter;
if (pid > 0) {
pid_synthesize_comm_event(pid, 0);
pid_synthesize_mmap_samples(pid);
}
group_fd = -1;
for (counter = 0; counter < nr_counters; counter++)
create_counter(counter, cpu, pid);
nr_cpu++;
}
static int __cmd_record(int argc, const char **argv)
{
int i, counter;
struct stat st;
pid_t pid;
int flags;
int ret;
page_size = sysconf(_SC_PAGE_SIZE);
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
assert(nr_cpus <= MAX_NR_CPUS);
assert(nr_cpus >= 0);
if (!stat(output_name, &st) && !force && !append_file) {
fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
output_name);
exit(-1);
}
flags = O_CREAT|O_RDWR;
if (append_file)
flags |= O_APPEND;
else
flags |= O_TRUNC;
output = open(output_name, flags, S_IRUSR|S_IWUSR);
if (output < 0) {
perror("failed to create output file");
exit(-1);
}
if (!system_wide) {
open_counters(-1, target_pid != -1 ? target_pid : getpid());
} else for (i = 0; i < nr_cpus; i++)
open_counters(i, target_pid);
atexit(sig_atexit);
signal(SIGCHLD, sig_handler);
signal(SIGINT, sig_handler);
if (target_pid == -1 && argc) {
pid = fork();
if (pid < 0)
perror("failed to fork");
if (!pid) {
if (execvp(argv[0], (char **)argv)) {
perror(argv[0]);
exit(-1);
}
}
}
if (realtime_prio) {
struct sched_param param;
param.sched_priority = realtime_prio;
if (sched_setscheduler(0, SCHED_FIFO, &param)) {
printf("Could not set realtime priority.\n");
exit(-1);
}
}
if (system_wide)
synthesize_samples();
while (!done) {
int hits = samples;
for (i = 0; i < nr_cpu; i++) {
for (counter = 0; counter < nr_counters; counter++)
mmap_read(&mmap_array[i][counter]);
}
if (hits == samples)
ret = poll(event_array, nr_poll, 100);
}
/*
* Approximate RIP event size: 24 bytes.
*/
fprintf(stderr,
"[ perf record: Captured and wrote %.3f MB %s (~%lld samples) ]\n",
(double)bytes_written / 1024.0 / 1024.0,
output_name,
bytes_written / 24);
return 0;
}
static const char * const record_usage[] = {
"perf record [<options>] [<command>]",
"perf record [<options>] -- <command> [<options>]",
NULL
};
static const struct option options[] = {
OPT_CALLBACK('e', "event", NULL, "event",
"event selector. use 'perf list' to list available events",
parse_events),
OPT_INTEGER('p', "pid", &target_pid,
"record events on existing pid"),
OPT_INTEGER('r', "realtime", &realtime_prio,
"collect data with this RT SCHED_FIFO priority"),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
OPT_BOOLEAN('A', "append", &append_file,
"append to the output file to do incremental profiling"),
OPT_BOOLEAN('f', "force", &force,
"overwrite existing data file"),
OPT_LONG('c', "count", &default_interval,
"event period to sample"),
OPT_STRING('o', "output", &output_name, "file",
"output file name"),
OPT_BOOLEAN('i', "inherit", &inherit,
"child tasks inherit counters"),
OPT_INTEGER('F', "freq", &freq,
"profile at this frequency"),
OPT_INTEGER('m', "mmap-pages", &mmap_pages,
"number of mmap data pages"),
OPT_BOOLEAN('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
OPT_END()
};
int cmd_record(int argc, const char **argv, const char *prefix)
{
int counter;
argc = parse_options(argc, argv, options, record_usage, 0);
if (!argc && target_pid == -1 && !system_wide)
usage_with_options(record_usage, options);
if (!nr_counters)
nr_counters = 1;
for (counter = 0; counter < nr_counters; counter++) {
if (attrs[counter].sample_period)
continue;
attrs[counter].sample_period = default_interval;
}
return __cmd_record(argc, argv);
}

1316
tools/perf/builtin-report.c Normal file

File diff suppressed because it is too large Load diff

367
tools/perf/builtin-stat.c Normal file
View file

@ -0,0 +1,367 @@
/*
* builtin-stat.c
*
* Builtin stat command: Give a precise performance counters summary
* overview about any workload, CPU or specific PID.
*
* Sample output:
$ perf stat ~/hackbench 10
Time: 0.104
Performance counter stats for '/home/mingo/hackbench':
1255.538611 task clock ticks # 10.143 CPU utilization factor
54011 context switches # 0.043 M/sec
385 CPU migrations # 0.000 M/sec
17755 pagefaults # 0.014 M/sec
3808323185 CPU cycles # 3033.219 M/sec
1575111190 instructions # 1254.530 M/sec
17367895 cache references # 13.833 M/sec
7674421 cache misses # 6.112 M/sec
Wall-clock time elapsed: 123.786620 msecs
*
* Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
*
* Improvements and fixes by:
*
* Arjan van de Ven <arjan@linux.intel.com>
* Yanmin Zhang <yanmin.zhang@intel.com>
* Wu Fengguang <fengguang.wu@intel.com>
* Mike Galbraith <efault@gmx.de>
* Paul Mackerras <paulus@samba.org>
*
* Released under the GPL v2. (and only v2, not any later version)
*/
#include "perf.h"
#include "builtin.h"
#include "util/util.h"
#include "util/parse-options.h"
#include "util/parse-events.h"
#include <sys/prctl.h>
static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
};
static int system_wide = 0;
static int inherit = 1;
static int verbose = 0;
static int fd[MAX_NR_CPUS][MAX_COUNTERS];
static int target_pid = -1;
static int nr_cpus = 0;
static unsigned int page_size;
static int scale = 1;
static const unsigned int default_count[] = {
1000000,
1000000,
10000,
10000,
1000000,
10000,
};
static __u64 event_res[MAX_COUNTERS][3];
static __u64 event_scaled[MAX_COUNTERS];
static __u64 runtime_nsecs;
static __u64 walltime_nsecs;
static __u64 runtime_cycles;
static void create_perf_stat_counter(int counter)
{
struct perf_counter_attr *attr = attrs + counter;
if (scale)
attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING;
if (system_wide) {
int cpu;
for (cpu = 0; cpu < nr_cpus; cpu ++) {
fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0);
if (fd[cpu][counter] < 0 && verbose) {
printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno));
}
}
} else {
attr->inherit = inherit;
attr->disabled = 1;
fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0);
if (fd[0][counter] < 0 && verbose) {
printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno));
}
}
}
/*
* Does the counter have nsecs as a unit?
*/
static inline int nsec_counter(int counter)
{
if (attrs[counter].type != PERF_TYPE_SOFTWARE)
return 0;
if (attrs[counter].config == PERF_COUNT_SW_CPU_CLOCK)
return 1;
if (attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK)
return 1;
return 0;
}
/*
* Read out the results of a single counter:
*/
static void read_counter(int counter)
{
__u64 *count, single_count[3];
ssize_t res;
int cpu, nv;
int scaled;
count = event_res[counter];
count[0] = count[1] = count[2] = 0;
nv = scale ? 3 : 1;
for (cpu = 0; cpu < nr_cpus; cpu ++) {
if (fd[cpu][counter] < 0)
continue;
res = read(fd[cpu][counter], single_count, nv * sizeof(__u64));
assert(res == nv * sizeof(__u64));
count[0] += single_count[0];
if (scale) {
count[1] += single_count[1];
count[2] += single_count[2];
}
}
scaled = 0;
if (scale) {
if (count[2] == 0) {
event_scaled[counter] = -1;
count[0] = 0;
return;
}
if (count[2] < count[1]) {
event_scaled[counter] = 1;
count[0] = (unsigned long long)
((double)count[0] * count[1] / count[2] + 0.5);
}
}
/*
* Save the full runtime - to allow normalization during printout:
*/
if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK)
runtime_nsecs = count[0];
if (attrs[counter].type == PERF_TYPE_HARDWARE &&
attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES)
runtime_cycles = count[0];
}
/*
* Print out the results of a single counter:
*/
static void print_counter(int counter)
{
__u64 *count;
int scaled;
count = event_res[counter];
scaled = event_scaled[counter];
if (scaled == -1) {
fprintf(stderr, " %14s %-20s\n",
"<not counted>", event_name(counter));
return;
}
if (nsec_counter(counter)) {
double msecs = (double)count[0] / 1000000;
fprintf(stderr, " %14.6f %-20s",
msecs, event_name(counter));
if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) {
if (walltime_nsecs)
fprintf(stderr, " # %11.3f CPU utilization factor",
(double)count[0] / (double)walltime_nsecs);
}
} else {
fprintf(stderr, " %14Ld %-20s",
count[0], event_name(counter));
if (runtime_nsecs)
fprintf(stderr, " # %11.3f M/sec",
(double)count[0]/runtime_nsecs*1000.0);
if (runtime_cycles &&
attrs[counter].type == PERF_TYPE_HARDWARE &&
attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) {
fprintf(stderr, " # %1.3f per cycle",
(double)count[0] / (double)runtime_cycles);
}
}
if (scaled)
fprintf(stderr, " (scaled from %.2f%%)",
(double) count[2] / count[1] * 100);
fprintf(stderr, "\n");
}
static int do_perf_stat(int argc, const char **argv)
{
unsigned long long t0, t1;
int counter;
int status;
int pid;
int i;
if (!system_wide)
nr_cpus = 1;
for (counter = 0; counter < nr_counters; counter++)
create_perf_stat_counter(counter);
/*
* Enable counters and exec the command:
*/
t0 = rdclock();
prctl(PR_TASK_PERF_COUNTERS_ENABLE);
if ((pid = fork()) < 0)
perror("failed to fork");
if (!pid) {
if (execvp(argv[0], (char **)argv)) {
perror(argv[0]);
exit(-1);
}
}
while (wait(&status) >= 0)
;
prctl(PR_TASK_PERF_COUNTERS_DISABLE);
t1 = rdclock();
walltime_nsecs = t1 - t0;
fflush(stdout);
fprintf(stderr, "\n");
fprintf(stderr, " Performance counter stats for \'%s", argv[0]);
for (i = 1; i < argc; i++)
fprintf(stderr, " %s", argv[i]);
fprintf(stderr, "\':\n");
fprintf(stderr, "\n");
for (counter = 0; counter < nr_counters; counter++)
read_counter(counter);
for (counter = 0; counter < nr_counters; counter++)
print_counter(counter);
fprintf(stderr, "\n");
fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
(double)(t1-t0)/1e6);
fprintf(stderr, "\n");
return 0;
}
static volatile int signr = -1;
static void skip_signal(int signo)
{
signr = signo;
}
static void sig_atexit(void)
{
if (signr == -1)
return;
signal(signr, SIG_DFL);
kill(getpid(), signr);
}
static const char * const stat_usage[] = {
"perf stat [<options>] <command>",
NULL
};
static const struct option options[] = {
OPT_CALLBACK('e', "event", NULL, "event",
"event selector. use 'perf list' to list available events",
parse_events),
OPT_BOOLEAN('i', "inherit", &inherit,
"child tasks inherit counters"),
OPT_INTEGER('p', "pid", &target_pid,
"stat events on existing pid"),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
OPT_BOOLEAN('S', "scale", &scale,
"scale/normalize counters"),
OPT_BOOLEAN('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
OPT_END()
};
int cmd_stat(int argc, const char **argv, const char *prefix)
{
page_size = sysconf(_SC_PAGE_SIZE);
memcpy(attrs, default_attrs, sizeof(attrs));
argc = parse_options(argc, argv, options, stat_usage, 0);
if (!argc)
usage_with_options(stat_usage, options);
if (!nr_counters)
nr_counters = 8;
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
assert(nr_cpus <= MAX_NR_CPUS);
assert(nr_cpus >= 0);
/*
* We dont want to block the signals - that would cause
* child tasks to inherit that and Ctrl-C would not work.
* What we want is for Ctrl-C to work in the exec()-ed
* task, but being ignored by perf stat itself:
*/
atexit(sig_atexit);
signal(SIGINT, skip_signal);
signal(SIGALRM, skip_signal);
signal(SIGABRT, skip_signal);
return do_perf_stat(argc, argv);
}

736
tools/perf/builtin-top.c Normal file
View file

@ -0,0 +1,736 @@
/*
* builtin-top.c
*
* Builtin top command: Display a continuously updated profile of
* any workload, CPU or specific PID.
*
* Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
*
* Improvements and fixes by:
*
* Arjan van de Ven <arjan@linux.intel.com>
* Yanmin Zhang <yanmin.zhang@intel.com>
* Wu Fengguang <fengguang.wu@intel.com>
* Mike Galbraith <efault@gmx.de>
* Paul Mackerras <paulus@samba.org>
*
* Released under the GPL v2. (and only v2, not any later version)
*/
#include "builtin.h"
#include "perf.h"
#include "util/symbol.h"
#include "util/color.h"
#include "util/util.h"
#include "util/rbtree.h"
#include "util/parse-options.h"
#include "util/parse-events.h"
#include <assert.h>
#include <fcntl.h>
#include <stdio.h>
#include <errno.h>
#include <time.h>
#include <sched.h>
#include <pthread.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/uio.h>
#include <sys/mman.h>
#include <linux/unistd.h>
#include <linux/types.h>
static int fd[MAX_NR_CPUS][MAX_COUNTERS];
static int system_wide = 0;
static int default_interval = 100000;
static __u64 count_filter = 5;
static int print_entries = 15;
static int target_pid = -1;
static int profile_cpu = -1;
static int nr_cpus = 0;
static unsigned int realtime_prio = 0;
static int group = 0;
static unsigned int page_size;
static unsigned int mmap_pages = 16;
static int freq = 0;
static int verbose = 0;
static char *sym_filter;
static unsigned long filter_start;
static unsigned long filter_end;
static int delay_secs = 2;
static int zero;
static int dump_symtab;
/*
* Symbols
*/
static __u64 min_ip;
static __u64 max_ip = -1ll;
struct sym_entry {
struct rb_node rb_node;
struct list_head node;
unsigned long count[MAX_COUNTERS];
unsigned long snap_count;
double weight;
int skip;
};
struct sym_entry *sym_filter_entry;
struct dso *kernel_dso;
/*
* Symbols will be added here in record_ip and will get out
* after decayed.
*/
static LIST_HEAD(active_symbols);
static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER;
/*
* Ordering weight: count-1 * count-2 * ... / count-n
*/
static double sym_weight(const struct sym_entry *sym)
{
double weight = sym->snap_count;
int counter;
for (counter = 1; counter < nr_counters-1; counter++)
weight *= sym->count[counter];
weight /= (sym->count[counter] + 1);
return weight;
}
static long samples;
static long userspace_samples;
static const char CONSOLE_CLEAR[] = "";
static void __list_insert_active_sym(struct sym_entry *syme)
{
list_add(&syme->node, &active_symbols);
}
static void list_remove_active_sym(struct sym_entry *syme)
{
pthread_mutex_lock(&active_symbols_lock);
list_del_init(&syme->node);
pthread_mutex_unlock(&active_symbols_lock);
}
static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
{
struct rb_node **p = &tree->rb_node;
struct rb_node *parent = NULL;
struct sym_entry *iter;
while (*p != NULL) {
parent = *p;
iter = rb_entry(parent, struct sym_entry, rb_node);
if (se->weight > iter->weight)
p = &(*p)->rb_left;
else
p = &(*p)->rb_right;
}
rb_link_node(&se->rb_node, parent, p);
rb_insert_color(&se->rb_node, tree);
}
static void print_sym_table(void)
{
int printed = 0, j;
int counter;
float samples_per_sec = samples/delay_secs;
float ksamples_per_sec = (samples-userspace_samples)/delay_secs;
float sum_ksamples = 0.0;
struct sym_entry *syme, *n;
struct rb_root tmp = RB_ROOT;
struct rb_node *nd;
samples = userspace_samples = 0;
/* Sort the active symbols */
pthread_mutex_lock(&active_symbols_lock);
syme = list_entry(active_symbols.next, struct sym_entry, node);
pthread_mutex_unlock(&active_symbols_lock);
list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
syme->snap_count = syme->count[0];
if (syme->snap_count != 0) {
syme->weight = sym_weight(syme);
rb_insert_active_sym(&tmp, syme);
sum_ksamples += syme->snap_count;
for (j = 0; j < nr_counters; j++)
syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8;
} else
list_remove_active_sym(syme);
}
puts(CONSOLE_CLEAR);
printf(
"------------------------------------------------------------------------------\n");
printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% [",
samples_per_sec,
100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
if (nr_counters == 1) {
printf("%Ld", attrs[0].sample_period);
if (freq)
printf("Hz ");
else
printf(" ");
}
for (counter = 0; counter < nr_counters; counter++) {
if (counter)
printf("/");
printf("%s", event_name(counter));
}
printf( "], ");
if (target_pid != -1)
printf(" (target_pid: %d", target_pid);
else
printf(" (all");
if (profile_cpu != -1)
printf(", cpu: %d)\n", profile_cpu);
else {
if (target_pid != -1)
printf(")\n");
else
printf(", %d CPUs)\n", nr_cpus);
}
printf("------------------------------------------------------------------------------\n\n");
if (nr_counters == 1)
printf(" samples pcnt");
else
printf(" weight samples pcnt");
printf(" RIP kernel function\n"
" ______ _______ _____ ________________ _______________\n\n"
);
for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
struct sym_entry *syme = rb_entry(nd, struct sym_entry, rb_node);
struct symbol *sym = (struct symbol *)(syme + 1);
char *color = PERF_COLOR_NORMAL;
double pcnt;
if (++printed > print_entries || syme->snap_count < count_filter)
continue;
pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
sum_ksamples));
/*
* We color high-overhead entries in red, mid-overhead
* entries in green - and keep the low overhead places
* normal:
*/
if (pcnt >= 5.0) {
color = PERF_COLOR_RED;
} else {
if (pcnt >= 0.5)
color = PERF_COLOR_GREEN;
}
if (nr_counters == 1)
printf("%20.2f - ", syme->weight);
else
printf("%9.1f %10ld - ", syme->weight, syme->snap_count);
color_fprintf(stdout, color, "%4.1f%%", pcnt);
printf(" - %016llx : %s\n", sym->start, sym->name);
}
}
static void *display_thread(void *arg)
{
struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
int delay_msecs = delay_secs * 1000;
printf("PerfTop refresh period: %d seconds\n", delay_secs);
do {
print_sym_table();
} while (!poll(&stdin_poll, 1, delay_msecs) == 1);
printf("key pressed - exiting.\n");
exit(0);
return NULL;
}
static int symbol_filter(struct dso *self, struct symbol *sym)
{
static int filter_match;
struct sym_entry *syme;
const char *name = sym->name;
if (!strcmp(name, "_text") ||
!strcmp(name, "_etext") ||
!strcmp(name, "_sinittext") ||
!strncmp("init_module", name, 11) ||
!strncmp("cleanup_module", name, 14) ||
strstr(name, "_text_start") ||
strstr(name, "_text_end"))
return 1;
syme = dso__sym_priv(self, sym);
/* Tag samples to be skipped. */
if (!strcmp("default_idle", name) ||
!strcmp("cpu_idle", name) ||
!strcmp("enter_idle", name) ||
!strcmp("exit_idle", name) ||
!strcmp("mwait_idle", name))
syme->skip = 1;
if (filter_match == 1) {
filter_end = sym->start;
filter_match = -1;
if (filter_end - filter_start > 10000) {
fprintf(stderr,
"hm, too large filter symbol <%s> - skipping.\n",
sym_filter);
fprintf(stderr, "symbol filter start: %016lx\n",
filter_start);
fprintf(stderr, " end: %016lx\n",
filter_end);
filter_end = filter_start = 0;
sym_filter = NULL;
sleep(1);
}
}
if (filter_match == 0 && sym_filter && !strcmp(name, sym_filter)) {
filter_match = 1;
filter_start = sym->start;
}
return 0;
}
static int parse_symbols(void)
{
struct rb_node *node;
struct symbol *sym;
kernel_dso = dso__new("[kernel]", sizeof(struct sym_entry));
if (kernel_dso == NULL)
return -1;
if (dso__load_kernel(kernel_dso, NULL, symbol_filter, 1) != 0)
goto out_delete_dso;
node = rb_first(&kernel_dso->syms);
sym = rb_entry(node, struct symbol, rb_node);
min_ip = sym->start;
node = rb_last(&kernel_dso->syms);
sym = rb_entry(node, struct symbol, rb_node);
max_ip = sym->end;
if (dump_symtab)
dso__fprintf(kernel_dso, stderr);
return 0;
out_delete_dso:
dso__delete(kernel_dso);
kernel_dso = NULL;
return -1;
}
#define TRACE_COUNT 3
/*
* Binary search in the histogram table and record the hit:
*/
static void record_ip(__u64 ip, int counter)
{
struct symbol *sym = dso__find_symbol(kernel_dso, ip);
if (sym != NULL) {
struct sym_entry *syme = dso__sym_priv(kernel_dso, sym);
if (!syme->skip) {
syme->count[counter]++;
pthread_mutex_lock(&active_symbols_lock);
if (list_empty(&syme->node) || !syme->node.next)
__list_insert_active_sym(syme);
pthread_mutex_unlock(&active_symbols_lock);
return;
}
}
samples--;
}
static void process_event(__u64 ip, int counter)
{
samples++;
if (ip < min_ip || ip > max_ip) {
userspace_samples++;
return;
}
record_ip(ip, counter);
}
struct mmap_data {
int counter;
void *base;
unsigned int mask;
unsigned int prev;
};
static unsigned int mmap_read_head(struct mmap_data *md)
{
struct perf_counter_mmap_page *pc = md->base;
int head;
head = pc->data_head;
rmb();
return head;
}
struct timeval last_read, this_read;
static void mmap_read_counter(struct mmap_data *md)
{
unsigned int head = mmap_read_head(md);
unsigned int old = md->prev;
unsigned char *data = md->base + page_size;
int diff;
gettimeofday(&this_read, NULL);
/*
* If we're further behind than half the buffer, there's a chance
* the writer will bite our tail and mess up the samples under us.
*
* If we somehow ended up ahead of the head, we got messed up.
*
* In either case, truncate and restart at head.
*/
diff = head - old;
if (diff > md->mask / 2 || diff < 0) {
struct timeval iv;
unsigned long msecs;
timersub(&this_read, &last_read, &iv);
msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
fprintf(stderr, "WARNING: failed to keep up with mmap data."
" Last read %lu msecs ago.\n", msecs);
/*
* head points to a known good entry, start there.
*/
old = head;
}
last_read = this_read;
for (; old != head;) {
struct ip_event {
struct perf_event_header header;
__u64 ip;
__u32 pid, target_pid;
};
struct mmap_event {
struct perf_event_header header;
__u32 pid, target_pid;
__u64 start;
__u64 len;
__u64 pgoff;
char filename[PATH_MAX];
};
typedef union event_union {
struct perf_event_header header;
struct ip_event ip;
struct mmap_event mmap;
} event_t;
event_t *event = (event_t *)&data[old & md->mask];
event_t event_copy;
size_t size = event->header.size;
/*
* Event straddles the mmap boundary -- header should always
* be inside due to u64 alignment of output.
*/
if ((old & md->mask) + size != ((old + size) & md->mask)) {
unsigned int offset = old;
unsigned int len = min(sizeof(*event), size), cpy;
void *dst = &event_copy;
do {
cpy = min(md->mask + 1 - (offset & md->mask), len);
memcpy(dst, &data[offset & md->mask], cpy);
offset += cpy;
dst += cpy;
len -= cpy;
} while (len);
event = &event_copy;
}
old += size;
if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
if (event->header.type & PERF_SAMPLE_IP)
process_event(event->ip.ip, md->counter);
}
}
md->prev = old;
}
static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
static void mmap_read(void)
{
int i, counter;
for (i = 0; i < nr_cpus; i++) {
for (counter = 0; counter < nr_counters; counter++)
mmap_read_counter(&mmap_array[i][counter]);
}
}
int nr_poll;
int group_fd;
static void start_counter(int i, int counter)
{
struct perf_counter_attr *attr;
unsigned int cpu;
cpu = profile_cpu;
if (target_pid == -1 && profile_cpu == -1)
cpu = i;
attr = attrs + counter;
attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
attr->freq = freq;
try_again:
fd[i][counter] = sys_perf_counter_open(attr, target_pid, cpu, group_fd, 0);
if (fd[i][counter] < 0) {
int err = errno;
if (err == EPERM)
die("No permission - are you root?\n");
/*
* If it's cycles then fall back to hrtimer
* based cpu-clock-tick sw counter, which
* is always available even if no PMU support:
*/
if (attr->type == PERF_TYPE_HARDWARE
&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
if (verbose)
warning(" ... trying to fall back to cpu-clock-ticks\n");
attr->type = PERF_TYPE_SOFTWARE;
attr->config = PERF_COUNT_SW_CPU_CLOCK;
goto try_again;
}
printf("\n");
error("perfcounter syscall returned with %d (%s)\n",
fd[i][counter], strerror(err));
die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n");
exit(-1);
}
assert(fd[i][counter] >= 0);
fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
/*
* First counter acts as the group leader:
*/
if (group && group_fd == -1)
group_fd = fd[i][counter];
event_array[nr_poll].fd = fd[i][counter];
event_array[nr_poll].events = POLLIN;
nr_poll++;
mmap_array[i][counter].counter = counter;
mmap_array[i][counter].prev = 0;
mmap_array[i][counter].mask = mmap_pages*page_size - 1;
mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
PROT_READ, MAP_SHARED, fd[i][counter], 0);
if (mmap_array[i][counter].base == MAP_FAILED)
die("failed to mmap with %d (%s)\n", errno, strerror(errno));
}
static int __cmd_top(void)
{
pthread_t thread;
int i, counter;
int ret;
for (i = 0; i < nr_cpus; i++) {
group_fd = -1;
for (counter = 0; counter < nr_counters; counter++)
start_counter(i, counter);
}
/* Wait for a minimal set of events before starting the snapshot */
poll(event_array, nr_poll, 100);
mmap_read();
if (pthread_create(&thread, NULL, display_thread, NULL)) {
printf("Could not create display thread.\n");
exit(-1);
}
if (realtime_prio) {
struct sched_param param;
param.sched_priority = realtime_prio;
if (sched_setscheduler(0, SCHED_FIFO, &param)) {
printf("Could not set realtime priority.\n");
exit(-1);
}
}
while (1) {
int hits = samples;
mmap_read();
if (hits == samples)
ret = poll(event_array, nr_poll, 100);
}
return 0;
}
static const char * const top_usage[] = {
"perf top [<options>]",
NULL
};
static const struct option options[] = {
OPT_CALLBACK('e', "event", NULL, "event",
"event selector. use 'perf list' to list available events",
parse_events),
OPT_INTEGER('c', "count", &default_interval,
"event period to sample"),
OPT_INTEGER('p', "pid", &target_pid,
"profile events on existing pid"),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
OPT_INTEGER('C', "CPU", &profile_cpu,
"CPU to profile on"),
OPT_INTEGER('m', "mmap-pages", &mmap_pages,
"number of mmap data pages"),
OPT_INTEGER('r', "realtime", &realtime_prio,
"collect data with this RT SCHED_FIFO priority"),
OPT_INTEGER('d', "delay", &delay_secs,
"number of seconds to delay between refreshes"),
OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
"dump the symbol table used for profiling"),
OPT_INTEGER('f', "count-filter", &count_filter,
"only display functions with more events than this"),
OPT_BOOLEAN('g', "group", &group,
"put the counters into a counter group"),
OPT_STRING('s', "sym-filter", &sym_filter, "pattern",
"only display symbols matchig this pattern"),
OPT_BOOLEAN('z', "zero", &group,
"zero history across updates"),
OPT_INTEGER('F', "freq", &freq,
"profile at this frequency"),
OPT_INTEGER('E', "entries", &print_entries,
"display this many functions"),
OPT_BOOLEAN('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
OPT_END()
};
int cmd_top(int argc, const char **argv, const char *prefix)
{
int counter;
page_size = sysconf(_SC_PAGE_SIZE);
argc = parse_options(argc, argv, options, top_usage, 0);
if (argc)
usage_with_options(top_usage, options);
if (freq) {
default_interval = freq;
freq = 1;
}
/* CPU and PID are mutually exclusive */
if (target_pid != -1 && profile_cpu != -1) {
printf("WARNING: PID switch overriding CPU\n");
sleep(1);
profile_cpu = -1;
}
if (!nr_counters)
nr_counters = 1;
if (delay_secs < 1)
delay_secs = 1;
parse_symbols();
/*
* Fill in the ones not specifically initialized via -c:
*/
for (counter = 0; counter < nr_counters; counter++) {
if (attrs[counter].sample_period)
continue;
attrs[counter].sample_period = default_interval;
}
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
assert(nr_cpus <= MAX_NR_CPUS);
assert(nr_cpus >= 0);
if (target_pid != -1 || profile_cpu != -1)
nr_cpus = 1;
return __cmd_top();
}

26
tools/perf/builtin.h Normal file
View file

@ -0,0 +1,26 @@
#ifndef BUILTIN_H
#define BUILTIN_H
#include "util/util.h"
#include "util/strbuf.h"
extern const char perf_version_string[];
extern const char perf_usage_string[];
extern const char perf_more_info_string[];
extern void list_common_cmds_help(void);
extern const char *help_unknown_cmd(const char *cmd);
extern void prune_packed_objects(int);
extern int read_line_with_nul(char *buf, int size, FILE *file);
extern int check_pager_config(const char *cmd);
extern int cmd_annotate(int argc, const char **argv, const char *prefix);
extern int cmd_help(int argc, const char **argv, const char *prefix);
extern int cmd_record(int argc, const char **argv, const char *prefix);
extern int cmd_report(int argc, const char **argv, const char *prefix);
extern int cmd_stat(int argc, const char **argv, const char *prefix);
extern int cmd_top(int argc, const char **argv, const char *prefix);
extern int cmd_version(int argc, const char **argv, const char *prefix);
extern int cmd_list(int argc, const char **argv, const char *prefix);
#endif

View file

@ -0,0 +1,10 @@
#
# List of known perf commands.
# command name category [deprecated] [common]
#
perf-annotate mainporcelain common
perf-list mainporcelain common
perf-record mainporcelain common
perf-report mainporcelain common
perf-stat mainporcelain common
perf-top mainporcelain common

442
tools/perf/design.txt Normal file
View file

@ -0,0 +1,442 @@
Performance Counters for Linux
------------------------------
Performance counters are special hardware registers available on most modern
CPUs. These registers count the number of certain types of hw events: such
as instructions executed, cachemisses suffered, or branches mis-predicted -
without slowing down the kernel or applications. These registers can also
trigger interrupts when a threshold number of events have passed - and can
thus be used to profile the code that runs on that CPU.
The Linux Performance Counter subsystem provides an abstraction of these
hardware capabilities. It provides per task and per CPU counters, counter
groups, and it provides event capabilities on top of those. It
provides "virtual" 64-bit counters, regardless of the width of the
underlying hardware counters.
Performance counters are accessed via special file descriptors.
There's one file descriptor per virtual counter used.
The special file descriptor is opened via the perf_counter_open()
system call:
int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr,
pid_t pid, int cpu, int group_fd,
unsigned long flags);
The syscall returns the new fd. The fd can be used via the normal
VFS system calls: read() can be used to read the counter, fcntl()
can be used to set the blocking mode, etc.
Multiple counters can be kept open at a time, and the counters
can be poll()ed.
When creating a new counter fd, 'perf_counter_hw_event' is:
struct perf_counter_hw_event {
/*
* The MSB of the config word signifies if the rest contains cpu
* specific (raw) counter configuration data, if unset, the next
* 7 bits are an event type and the rest of the bits are the event
* identifier.
*/
__u64 config;
__u64 irq_period;
__u32 record_type;
__u32 read_format;
__u64 disabled : 1, /* off by default */
inherit : 1, /* children inherit it */
pinned : 1, /* must always be on PMU */
exclusive : 1, /* only group on PMU */
exclude_user : 1, /* don't count user */
exclude_kernel : 1, /* ditto kernel */
exclude_hv : 1, /* ditto hypervisor */
exclude_idle : 1, /* don't count when idle */
mmap : 1, /* include mmap data */
munmap : 1, /* include munmap data */
comm : 1, /* include comm data */
__reserved_1 : 52;
__u32 extra_config_len;
__u32 wakeup_events; /* wakeup every n events */
__u64 __reserved_2;
__u64 __reserved_3;
};
The 'config' field specifies what the counter should count. It
is divided into 3 bit-fields:
raw_type: 1 bit (most significant bit) 0x8000_0000_0000_0000
type: 7 bits (next most significant) 0x7f00_0000_0000_0000
event_id: 56 bits (least significant) 0x00ff_ffff_ffff_ffff
If 'raw_type' is 1, then the counter will count a hardware event
specified by the remaining 63 bits of event_config. The encoding is
machine-specific.
If 'raw_type' is 0, then the 'type' field says what kind of counter
this is, with the following encoding:
enum perf_event_types {
PERF_TYPE_HARDWARE = 0,
PERF_TYPE_SOFTWARE = 1,
PERF_TYPE_TRACEPOINT = 2,
};
A counter of PERF_TYPE_HARDWARE will count the hardware event
specified by 'event_id':
/*
* Generalized performance counter event types, used by the hw_event.event_id
* parameter of the sys_perf_counter_open() syscall:
*/
enum hw_event_ids {
/*
* Common hardware events, generalized by the kernel:
*/
PERF_COUNT_HW_CPU_CYCLES = 0,
PERF_COUNT_HW_INSTRUCTIONS = 1,
PERF_COUNT_HW_CACHE_REFERENCES = 2,
PERF_COUNT_HW_CACHE_MISSES = 3,
PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4,
PERF_COUNT_HW_BRANCH_MISSES = 5,
PERF_COUNT_HW_BUS_CYCLES = 6,
};
These are standardized types of events that work relatively uniformly
on all CPUs that implement Performance Counters support under Linux,
although there may be variations (e.g., different CPUs might count
cache references and misses at different levels of the cache hierarchy).
If a CPU is not able to count the selected event, then the system call
will return -EINVAL.
More hw_event_types are supported as well, but they are CPU-specific
and accessed as raw events. For example, to count "External bus
cycles while bus lock signal asserted" events on Intel Core CPUs, pass
in a 0x4064 event_id value and set hw_event.raw_type to 1.
A counter of type PERF_TYPE_SOFTWARE will count one of the available
software events, selected by 'event_id':
/*
* Special "software" counters provided by the kernel, even if the hardware
* does not support performance counters. These counters measure various
* physical and sw events of the kernel (and allow the profiling of them as
* well):
*/
enum sw_event_ids {
PERF_COUNT_SW_CPU_CLOCK = 0,
PERF_COUNT_SW_TASK_CLOCK = 1,
PERF_COUNT_SW_PAGE_FAULTS = 2,
PERF_COUNT_SW_CONTEXT_SWITCHES = 3,
PERF_COUNT_SW_CPU_MIGRATIONS = 4,
PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
};
Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event
tracer is available, and event_id values can be obtained from
/debug/tracing/events/*/*/id
Counters come in two flavours: counting counters and sampling
counters. A "counting" counter is one that is used for counting the
number of events that occur, and is characterised by having
irq_period = 0.
A read() on a counter returns the current value of the counter and possible
additional values as specified by 'read_format', each value is a u64 (8 bytes)
in size.
/*
* Bits that can be set in hw_event.read_format to request that
* reads on the counter should return the indicated quantities,
* in increasing order of bit value, after the counter value.
*/
enum perf_counter_read_format {
PERF_FORMAT_TOTAL_TIME_ENABLED = 1,
PERF_FORMAT_TOTAL_TIME_RUNNING = 2,
};
Using these additional values one can establish the overcommit ratio for a
particular counter allowing one to take the round-robin scheduling effect
into account.
A "sampling" counter is one that is set up to generate an interrupt
every N events, where N is given by 'irq_period'. A sampling counter
has irq_period > 0. The record_type controls what data is recorded on each
interrupt:
/*
* Bits that can be set in hw_event.record_type to request information
* in the overflow packets.
*/
enum perf_counter_record_format {
PERF_RECORD_IP = 1U << 0,
PERF_RECORD_TID = 1U << 1,
PERF_RECORD_TIME = 1U << 2,
PERF_RECORD_ADDR = 1U << 3,
PERF_RECORD_GROUP = 1U << 4,
PERF_RECORD_CALLCHAIN = 1U << 5,
};
Such (and other) events will be recorded in a ring-buffer, which is
available to user-space using mmap() (see below).
The 'disabled' bit specifies whether the counter starts out disabled
or enabled. If it is initially disabled, it can be enabled by ioctl
or prctl (see below).
The 'inherit' bit, if set, specifies that this counter should count
events on descendant tasks as well as the task specified. This only
applies to new descendents, not to any existing descendents at the
time the counter is created (nor to any new descendents of existing
descendents).
The 'pinned' bit, if set, specifies that the counter should always be
on the CPU if at all possible. It only applies to hardware counters
and only to group leaders. If a pinned counter cannot be put onto the
CPU (e.g. because there are not enough hardware counters or because of
a conflict with some other event), then the counter goes into an
'error' state, where reads return end-of-file (i.e. read() returns 0)
until the counter is subsequently enabled or disabled.
The 'exclusive' bit, if set, specifies that when this counter's group
is on the CPU, it should be the only group using the CPU's counters.
In future, this will allow sophisticated monitoring programs to supply
extra configuration information via 'extra_config_len' to exploit
advanced features of the CPU's Performance Monitor Unit (PMU) that are
not otherwise accessible and that might disrupt other hardware
counters.
The 'exclude_user', 'exclude_kernel' and 'exclude_hv' bits provide a
way to request that counting of events be restricted to times when the
CPU is in user, kernel and/or hypervisor mode.
The 'mmap' and 'munmap' bits allow recording of PROT_EXEC mmap/munmap
operations, these can be used to relate userspace IP addresses to actual
code, even after the mapping (or even the whole process) is gone,
these events are recorded in the ring-buffer (see below).
The 'comm' bit allows tracking of process comm data on process creation.
This too is recorded in the ring-buffer (see below).
The 'pid' parameter to the perf_counter_open() system call allows the
counter to be specific to a task:
pid == 0: if the pid parameter is zero, the counter is attached to the
current task.
pid > 0: the counter is attached to a specific task (if the current task
has sufficient privilege to do so)
pid < 0: all tasks are counted (per cpu counters)
The 'cpu' parameter allows a counter to be made specific to a CPU:
cpu >= 0: the counter is restricted to a specific CPU
cpu == -1: the counter counts on all CPUs
(Note: the combination of 'pid == -1' and 'cpu == -1' is not valid.)
A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts
events of that task and 'follows' that task to whatever CPU the task
gets schedule to. Per task counters can be created by any user, for
their own tasks.
A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts
all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege.
The 'flags' parameter is currently unused and must be zero.
The 'group_fd' parameter allows counter "groups" to be set up. A
counter group has one counter which is the group "leader". The leader
is created first, with group_fd = -1 in the perf_counter_open call
that creates it. The rest of the group members are created
subsequently, with group_fd giving the fd of the group leader.
(A single counter on its own is created with group_fd = -1 and is
considered to be a group with only 1 member.)
A counter group is scheduled onto the CPU as a unit, that is, it will
only be put onto the CPU if all of the counters in the group can be
put onto the CPU. This means that the values of the member counters
can be meaningfully compared, added, divided (to get ratios), etc.,
with each other, since they have counted events for the same set of
executed instructions.
Like stated, asynchronous events, like counter overflow or PROT_EXEC mmap
tracking are logged into a ring-buffer. This ring-buffer is created and
accessed through mmap().
The mmap size should be 1+2^n pages, where the first page is a meta-data page
(struct perf_counter_mmap_page) that contains various bits of information such
as where the ring-buffer head is.
/*
* Structure of the page that can be mapped via mmap
*/
struct perf_counter_mmap_page {
__u32 version; /* version number of this structure */
__u32 compat_version; /* lowest version this is compat with */
/*
* Bits needed to read the hw counters in user-space.
*
* u32 seq;
* s64 count;
*
* do {
* seq = pc->lock;
*
* barrier()
* if (pc->index) {
* count = pmc_read(pc->index - 1);
* count += pc->offset;
* } else
* goto regular_read;
*
* barrier();
* } while (pc->lock != seq);
*
* NOTE: for obvious reason this only works on self-monitoring
* processes.
*/
__u32 lock; /* seqlock for synchronization */
__u32 index; /* hardware counter identifier */
__s64 offset; /* add to hardware counter value */
/*
* Control data for the mmap() data buffer.
*
* User-space reading this value should issue an rmb(), on SMP capable
* platforms, after reading this value -- see perf_counter_wakeup().
*/
__u32 data_head; /* head in the data section */
};
NOTE: the hw-counter userspace bits are arch specific and are currently only
implemented on powerpc.
The following 2^n pages are the ring-buffer which contains events of the form:
#define PERF_EVENT_MISC_KERNEL (1 << 0)
#define PERF_EVENT_MISC_USER (1 << 1)
#define PERF_EVENT_MISC_OVERFLOW (1 << 2)
struct perf_event_header {
__u32 type;
__u16 misc;
__u16 size;
};
enum perf_event_type {
/*
* The MMAP events record the PROT_EXEC mappings so that we can
* correlate userspace IPs to code. They have the following structure:
*
* struct {
* struct perf_event_header header;
*
* u32 pid, tid;
* u64 addr;
* u64 len;
* u64 pgoff;
* char filename[];
* };
*/
PERF_EVENT_MMAP = 1,
PERF_EVENT_MUNMAP = 2,
/*
* struct {
* struct perf_event_header header;
*
* u32 pid, tid;
* char comm[];
* };
*/
PERF_EVENT_COMM = 3,
/*
* When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
* will be PERF_RECORD_*
*
* struct {
* struct perf_event_header header;
*
* { u64 ip; } && PERF_RECORD_IP
* { u32 pid, tid; } && PERF_RECORD_TID
* { u64 time; } && PERF_RECORD_TIME
* { u64 addr; } && PERF_RECORD_ADDR
*
* { u64 nr;
* { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP
*
* { u16 nr,
* hv,
* kernel,
* user;
* u64 ips[nr]; } && PERF_RECORD_CALLCHAIN
* };
*/
};
NOTE: PERF_RECORD_CALLCHAIN is arch specific and currently only implemented
on x86.
Notification of new events is possible through poll()/select()/epoll() and
fcntl() managing signals.
Normally a notification is generated for every page filled, however one can
additionally set perf_counter_hw_event.wakeup_events to generate one every
so many counter overflow events.
Future work will include a splice() interface to the ring-buffer.
Counters can be enabled and disabled in two ways: via ioctl and via
prctl. When a counter is disabled, it doesn't count or generate
events but does continue to exist and maintain its count value.
An individual counter or counter group can be enabled with
ioctl(fd, PERF_COUNTER_IOC_ENABLE);
or disabled with
ioctl(fd, PERF_COUNTER_IOC_DISABLE);
Enabling or disabling the leader of a group enables or disables the
whole group; that is, while the group leader is disabled, none of the
counters in the group will count. Enabling or disabling a member of a
group other than the leader only affects that counter - disabling an
non-leader stops that counter from counting but doesn't affect any
other counter.
Additionally, non-inherited overflow counters can use
ioctl(fd, PERF_COUNTER_IOC_REFRESH, nr);
to enable a counter for 'nr' events, after which it gets disabled again.
A process can enable or disable all the counter groups that are
attached to it, using prctl:
prctl(PR_TASK_PERF_COUNTERS_ENABLE);
prctl(PR_TASK_PERF_COUNTERS_DISABLE);
This applies to all counters on the current process, whether created
by this process or by another, and doesn't affect any counters that
this process has created on other processes. It only enables or
disables the group leaders, not any other members in the groups.

428
tools/perf/perf.c Normal file
View file

@ -0,0 +1,428 @@
/*
* perf.c
*
* Performance analysis utility.
*
* This is the main hub from which the sub-commands (perf stat,
* perf top, perf record, perf report, etc.) are started.
*/
#include "builtin.h"
#include "util/exec_cmd.h"
#include "util/cache.h"
#include "util/quote.h"
#include "util/run-command.h"
const char perf_usage_string[] =
"perf [--version] [--help] COMMAND [ARGS]";
const char perf_more_info_string[] =
"See 'perf help COMMAND' for more information on a specific command.";
static int use_pager = -1;
struct pager_config {
const char *cmd;
int val;
};
static int pager_command_config(const char *var, const char *value, void *data)
{
struct pager_config *c = data;
if (!prefixcmp(var, "pager.") && !strcmp(var + 6, c->cmd))
c->val = perf_config_bool(var, value);
return 0;
}
/* returns 0 for "no pager", 1 for "use pager", and -1 for "not specified" */
int check_pager_config(const char *cmd)
{
struct pager_config c;
c.cmd = cmd;
c.val = -1;
perf_config(pager_command_config, &c);
return c.val;
}
static void commit_pager_choice(void) {
switch (use_pager) {
case 0:
setenv("PERF_PAGER", "cat", 1);
break;
case 1:
/* setup_pager(); */
break;
default:
break;
}
}
static int handle_options(const char*** argv, int* argc, int* envchanged)
{
int handled = 0;
while (*argc > 0) {
const char *cmd = (*argv)[0];
if (cmd[0] != '-')
break;
/*
* For legacy reasons, the "version" and "help"
* commands can be written with "--" prepended
* to make them look like flags.
*/
if (!strcmp(cmd, "--help") || !strcmp(cmd, "--version"))
break;
/*
* Check remaining flags.
*/
if (!prefixcmp(cmd, "--exec-path")) {
cmd += 11;
if (*cmd == '=')
perf_set_argv_exec_path(cmd + 1);
else {
puts(perf_exec_path());
exit(0);
}
} else if (!strcmp(cmd, "--html-path")) {
puts(system_path(PERF_HTML_PATH));
exit(0);
} else if (!strcmp(cmd, "-p") || !strcmp(cmd, "--paginate")) {
use_pager = 1;
} else if (!strcmp(cmd, "--no-pager")) {
use_pager = 0;
if (envchanged)
*envchanged = 1;
} else if (!strcmp(cmd, "--perf-dir")) {
if (*argc < 2) {
fprintf(stderr, "No directory given for --perf-dir.\n" );
usage(perf_usage_string);
}
setenv(PERF_DIR_ENVIRONMENT, (*argv)[1], 1);
if (envchanged)
*envchanged = 1;
(*argv)++;
(*argc)--;
handled++;
} else if (!prefixcmp(cmd, "--perf-dir=")) {
setenv(PERF_DIR_ENVIRONMENT, cmd + 10, 1);
if (envchanged)
*envchanged = 1;
} else if (!strcmp(cmd, "--work-tree")) {
if (*argc < 2) {
fprintf(stderr, "No directory given for --work-tree.\n" );
usage(perf_usage_string);
}
setenv(PERF_WORK_TREE_ENVIRONMENT, (*argv)[1], 1);
if (envchanged)
*envchanged = 1;
(*argv)++;
(*argc)--;
} else if (!prefixcmp(cmd, "--work-tree=")) {
setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + 12, 1);
if (envchanged)
*envchanged = 1;
} else {
fprintf(stderr, "Unknown option: %s\n", cmd);
usage(perf_usage_string);
}
(*argv)++;
(*argc)--;
handled++;
}
return handled;
}
static int handle_alias(int *argcp, const char ***argv)
{
int envchanged = 0, ret = 0, saved_errno = errno;
int count, option_count;
const char** new_argv;
const char *alias_command;
char *alias_string;
alias_command = (*argv)[0];
alias_string = alias_lookup(alias_command);
if (alias_string) {
if (alias_string[0] == '!') {
if (*argcp > 1) {
struct strbuf buf;
strbuf_init(&buf, PATH_MAX);
strbuf_addstr(&buf, alias_string);
sq_quote_argv(&buf, (*argv) + 1, PATH_MAX);
free(alias_string);
alias_string = buf.buf;
}
ret = system(alias_string + 1);
if (ret >= 0 && WIFEXITED(ret) &&
WEXITSTATUS(ret) != 127)
exit(WEXITSTATUS(ret));
die("Failed to run '%s' when expanding alias '%s'",
alias_string + 1, alias_command);
}
count = split_cmdline(alias_string, &new_argv);
if (count < 0)
die("Bad alias.%s string", alias_command);
option_count = handle_options(&new_argv, &count, &envchanged);
if (envchanged)
die("alias '%s' changes environment variables\n"
"You can use '!perf' in the alias to do this.",
alias_command);
memmove(new_argv - option_count, new_argv,
count * sizeof(char *));
new_argv -= option_count;
if (count < 1)
die("empty alias for %s", alias_command);
if (!strcmp(alias_command, new_argv[0]))
die("recursive alias: %s", alias_command);
new_argv = realloc(new_argv, sizeof(char*) *
(count + *argcp + 1));
/* insert after command name */
memcpy(new_argv + count, *argv + 1, sizeof(char*) * *argcp);
new_argv[count+*argcp] = NULL;
*argv = new_argv;
*argcp += count - 1;
ret = 1;
}
errno = saved_errno;
return ret;
}
const char perf_version_string[] = PERF_VERSION;
#define RUN_SETUP (1<<0)
#define USE_PAGER (1<<1)
/*
* require working tree to be present -- anything uses this needs
* RUN_SETUP for reading from the configuration file.
*/
#define NEED_WORK_TREE (1<<2)
struct cmd_struct {
const char *cmd;
int (*fn)(int, const char **, const char *);
int option;
};
static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
{
int status;
struct stat st;
const char *prefix;
prefix = NULL;
if (p->option & RUN_SETUP)
prefix = NULL; /* setup_perf_directory(); */
if (use_pager == -1 && p->option & RUN_SETUP)
use_pager = check_pager_config(p->cmd);
if (use_pager == -1 && p->option & USE_PAGER)
use_pager = 1;
commit_pager_choice();
if (p->option & NEED_WORK_TREE)
/* setup_work_tree() */;
status = p->fn(argc, argv, prefix);
if (status)
return status & 0xff;
/* Somebody closed stdout? */
if (fstat(fileno(stdout), &st))
return 0;
/* Ignore write errors for pipes and sockets.. */
if (S_ISFIFO(st.st_mode) || S_ISSOCK(st.st_mode))
return 0;
/* Check for ENOSPC and EIO errors.. */
if (fflush(stdout))
die("write failure on standard output: %s", strerror(errno));
if (ferror(stdout))
die("unknown write failure on standard output");
if (fclose(stdout))
die("close failed on standard output: %s", strerror(errno));
return 0;
}
static void handle_internal_command(int argc, const char **argv)
{
const char *cmd = argv[0];
static struct cmd_struct commands[] = {
{ "help", cmd_help, 0 },
{ "list", cmd_list, 0 },
{ "record", cmd_record, 0 },
{ "report", cmd_report, 0 },
{ "stat", cmd_stat, 0 },
{ "top", cmd_top, 0 },
{ "annotate", cmd_annotate, 0 },
{ "version", cmd_version, 0 },
};
int i;
static const char ext[] = STRIP_EXTENSION;
if (sizeof(ext) > 1) {
i = strlen(argv[0]) - strlen(ext);
if (i > 0 && !strcmp(argv[0] + i, ext)) {
char *argv0 = strdup(argv[0]);
argv[0] = cmd = argv0;
argv0[i] = '\0';
}
}
/* Turn "perf cmd --help" into "perf help cmd" */
if (argc > 1 && !strcmp(argv[1], "--help")) {
argv[1] = argv[0];
argv[0] = cmd = "help";
}
for (i = 0; i < ARRAY_SIZE(commands); i++) {
struct cmd_struct *p = commands+i;
if (strcmp(p->cmd, cmd))
continue;
exit(run_builtin(p, argc, argv));
}
}
static void execv_dashed_external(const char **argv)
{
struct strbuf cmd = STRBUF_INIT;
const char *tmp;
int status;
strbuf_addf(&cmd, "perf-%s", argv[0]);
/*
* argv[0] must be the perf command, but the argv array
* belongs to the caller, and may be reused in
* subsequent loop iterations. Save argv[0] and
* restore it on error.
*/
tmp = argv[0];
argv[0] = cmd.buf;
/*
* if we fail because the command is not found, it is
* OK to return. Otherwise, we just pass along the status code.
*/
status = run_command_v_opt(argv, 0);
if (status != -ERR_RUN_COMMAND_EXEC) {
if (IS_RUN_COMMAND_ERR(status))
die("unable to run '%s'", argv[0]);
exit(-status);
}
errno = ENOENT; /* as if we called execvp */
argv[0] = tmp;
strbuf_release(&cmd);
}
static int run_argv(int *argcp, const char ***argv)
{
int done_alias = 0;
while (1) {
/* See if it's an internal command */
handle_internal_command(*argcp, *argv);
/* .. then try the external ones */
execv_dashed_external(*argv);
/* It could be an alias -- this works around the insanity
* of overriding "perf log" with "perf show" by having
* alias.log = show
*/
if (done_alias || !handle_alias(argcp, argv))
break;
done_alias = 1;
}
return done_alias;
}
int main(int argc, const char **argv)
{
const char *cmd;
cmd = perf_extract_argv0_path(argv[0]);
if (!cmd)
cmd = "perf-help";
/*
* "perf-xxxx" is the same as "perf xxxx", but we obviously:
*
* - cannot take flags in between the "perf" and the "xxxx".
* - cannot execute it externally (since it would just do
* the same thing over again)
*
* So we just directly call the internal command handler, and
* die if that one cannot handle it.
*/
if (!prefixcmp(cmd, "perf-")) {
cmd += 5;
argv[0] = cmd;
handle_internal_command(argc, argv);
die("cannot handle %s internally", cmd);
}
/* Look for flags.. */
argv++;
argc--;
handle_options(&argv, &argc, NULL);
commit_pager_choice();
if (argc > 0) {
if (!prefixcmp(argv[0], "--"))
argv[0] += 2;
} else {
/* The user didn't specify a command; give them help */
printf("\n usage: %s\n\n", perf_usage_string);
list_common_cmds_help();
printf("\n %s\n\n", perf_more_info_string);
exit(1);
}
cmd = argv[0];
/*
* We use PATH to find perf commands, but we prepend some higher
* precidence paths: the "--exec-path" option, the PERF_EXEC_PATH
* environment, and the $(perfexecdir) from the Makefile at build
* time.
*/
setup_path();
while (1) {
static int done_help = 0;
static int was_alias = 0;
was_alias = run_argv(&argc, &argv);
if (errno != ENOENT)
break;
if (was_alias) {
fprintf(stderr, "Expansion of alias '%s' failed; "
"'%s' is not a perf-command\n",
cmd, argv[0]);
exit(1);
}
if (!done_help) {
cmd = argv[0] = help_unknown_cmd(cmd);
done_help = 1;
} else
break;
}
fprintf(stderr, "Failed to run command '%s': %s\n",
cmd, strerror(errno));
return 1;
}

67
tools/perf/perf.h Normal file
View file

@ -0,0 +1,67 @@
#ifndef _PERF_PERF_H
#define _PERF_PERF_H
#if defined(__x86_64__) || defined(__i386__)
#include "../../arch/x86/include/asm/unistd.h"
#define rmb() asm volatile("lfence" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
#endif
#ifdef __powerpc__
#include "../../arch/powerpc/include/asm/unistd.h"
#define rmb() asm volatile ("sync" ::: "memory")
#define cpu_relax() asm volatile ("" ::: "memory");
#endif
#include <time.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include "../../include/linux/perf_counter.h"
/*
* prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
* counters in the current task.
*/
#define PR_TASK_PERF_COUNTERS_DISABLE 31
#define PR_TASK_PERF_COUNTERS_ENABLE 32
#ifndef NSEC_PER_SEC
# define NSEC_PER_SEC 1000000000ULL
#endif
static inline unsigned long long rdclock(void)
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
}
/*
* Pick up some kernel type conventions:
*/
#define __user
#define asmlinkage
#define unlikely(x) __builtin_expect(!!(x), 0)
#define min(x, y) ({ \
typeof(x) _min1 = (x); \
typeof(y) _min2 = (y); \
(void) (&_min1 == &_min2); \
_min1 < _min2 ? _min1 : _min2; })
static inline int
sys_perf_counter_open(struct perf_counter_attr *attr_uptr,
pid_t pid, int cpu, int group_fd,
unsigned long flags)
{
return syscall(__NR_perf_counter_open, attr_uptr, pid, cpu,
group_fd, flags);
}
#define MAX_COUNTERS 256
#define MAX_NR_CPUS 256
#endif

View file

@ -0,0 +1,42 @@
#!/bin/sh
GVF=PERF-VERSION-FILE
DEF_VER=v0.0.1.PERF
LF='
'
# First see if there is a version file (included in release tarballs),
# then try git-describe, then default.
if test -f version
then
VN=$(cat version) || VN="$DEF_VER"
elif test -d .git -o -f .git &&
VN=$(git describe --abbrev=4 HEAD 2>/dev/null) &&
case "$VN" in
*$LF*) (exit 1) ;;
v[0-9]*)
git update-index -q --refresh
test -z "$(git diff-index --name-only HEAD --)" ||
VN="$VN-dirty" ;;
esac
then
VN=$(echo "$VN" | sed -e 's/-/./g');
else
VN="$DEF_VER"
fi
VN=$(expr "$VN" : v*'\(.*\)')
if test -r $GVF
then
VC=$(sed -e 's/^PERF_VERSION = //' <$GVF)
else
VC=unset
fi
test "$VN" = "$VC" || {
echo >&2 "PERF_VERSION = $VN"
echo "PERF_VERSION = $VN" >$GVF
}

117
tools/perf/util/abspath.c Normal file
View file

@ -0,0 +1,117 @@
#include "cache.h"
/*
* Do not use this for inspecting *tracked* content. When path is a
* symlink to a directory, we do not want to say it is a directory when
* dealing with tracked content in the working tree.
*/
static int is_directory(const char *path)
{
struct stat st;
return (!stat(path, &st) && S_ISDIR(st.st_mode));
}
/* We allow "recursive" symbolic links. Only within reason, though. */
#define MAXDEPTH 5
const char *make_absolute_path(const char *path)
{
static char bufs[2][PATH_MAX + 1], *buf = bufs[0], *next_buf = bufs[1];
char cwd[1024] = "";
int buf_index = 1, len;
int depth = MAXDEPTH;
char *last_elem = NULL;
struct stat st;
if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
die ("Too long path: %.*s", 60, path);
while (depth--) {
if (!is_directory(buf)) {
char *last_slash = strrchr(buf, '/');
if (last_slash) {
*last_slash = '\0';
last_elem = xstrdup(last_slash + 1);
} else {
last_elem = xstrdup(buf);
*buf = '\0';
}
}
if (*buf) {
if (!*cwd && !getcwd(cwd, sizeof(cwd)))
die ("Could not get current working directory");
if (chdir(buf))
die ("Could not switch to '%s'", buf);
}
if (!getcwd(buf, PATH_MAX))
die ("Could not get current working directory");
if (last_elem) {
int len = strlen(buf);
if (len + strlen(last_elem) + 2 > PATH_MAX)
die ("Too long path name: '%s/%s'",
buf, last_elem);
buf[len] = '/';
strcpy(buf + len + 1, last_elem);
free(last_elem);
last_elem = NULL;
}
if (!lstat(buf, &st) && S_ISLNK(st.st_mode)) {
len = readlink(buf, next_buf, PATH_MAX);
if (len < 0)
die ("Invalid symlink: %s", buf);
if (PATH_MAX <= len)
die("symbolic link too long: %s", buf);
next_buf[len] = '\0';
buf = next_buf;
buf_index = 1 - buf_index;
next_buf = bufs[buf_index];
} else
break;
}
if (*cwd && chdir(cwd))
die ("Could not change back to '%s'", cwd);
return buf;
}
static const char *get_pwd_cwd(void)
{
static char cwd[PATH_MAX + 1];
char *pwd;
struct stat cwd_stat, pwd_stat;
if (getcwd(cwd, PATH_MAX) == NULL)
return NULL;
pwd = getenv("PWD");
if (pwd && strcmp(pwd, cwd)) {
stat(cwd, &cwd_stat);
if (!stat(pwd, &pwd_stat) &&
pwd_stat.st_dev == cwd_stat.st_dev &&
pwd_stat.st_ino == cwd_stat.st_ino) {
strlcpy(cwd, pwd, PATH_MAX);
}
}
return cwd;
}
const char *make_nonrelative_path(const char *path)
{
static char buf[PATH_MAX + 1];
if (is_absolute_path(path)) {
if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
die("Too long path: %.*s", 60, path);
} else {
const char *cwd = get_pwd_cwd();
if (!cwd)
die("Cannot determine the current working directory");
if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX)
die("Too long path: %.*s", 60, path);
}
return buf;
}

Some files were not shown because too many files have changed in this diff Show more