aha/arch/powerpc/platforms/cell/cbe_regs.c

281 lines
6.5 KiB
C
Raw Normal View History

/*
* cbe_regs.c
*
* Accessor routines for the various MMIO register blocks of the CBE
*
* (c) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
*/
#include <linux/percpu.h>
#include <linux/types.h>
#include <linux/module.h>
#include <linux/of_device.h>
#include <linux/of_platform.h>
#include <asm/io.h>
#include <asm/pgtable.h>
#include <asm/prom.h>
#include <asm/ptrace.h>
#include <asm/cell-regs.h>
/*
* Current implementation uses "cpu" nodes. We build our own mapping
* array of cpu numbers to cpu nodes locally for now to allow interrupt
* time code to have a fast path rather than call of_get_cpu_node(). If
* we implement cpu hotplug, we'll have to install an appropriate norifier
* in order to release references to the cpu going away
*/
static struct cbe_regs_map
{
struct device_node *cpu_node;
struct device_node *be_node;
struct cbe_pmd_regs __iomem *pmd_regs;
struct cbe_iic_regs __iomem *iic_regs;
struct cbe_mic_tm_regs __iomem *mic_tm_regs;
struct cbe_pmd_shadow_regs pmd_shadow_regs;
} cbe_regs_maps[MAX_CBE];
static int cbe_regs_map_count;
static struct cbe_thread_map
{
struct device_node *cpu_node;
struct device_node *be_node;
struct cbe_regs_map *regs;
unsigned int thread_id;
unsigned int cbe_id;
} cbe_thread_map[NR_CPUS];
static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = CPU_MASK_NONE };
static cpumask_t cbe_first_online_cpu = CPU_MASK_NONE;
static struct cbe_regs_map *cbe_find_map(struct device_node *np)
{
int i;
struct device_node *tmp_np;
if (strcasecmp(np->type, "spe")) {
for (i = 0; i < cbe_regs_map_count; i++)
if (cbe_regs_maps[i].cpu_node == np ||
cbe_regs_maps[i].be_node == np)
return &cbe_regs_maps[i];
return NULL;
}
if (np->data)
return np->data;
/* walk up path until cpu or be node was found */
tmp_np = np;
do {
tmp_np = tmp_np->parent;
/* on a correct devicetree we wont get up to root */
BUG_ON(!tmp_np);
} while (strcasecmp(tmp_np->type, "cpu") &&
strcasecmp(tmp_np->type, "be"));
np->data = cbe_find_map(tmp_np);
return np->data;
}
struct cbe_pmd_regs __iomem *cbe_get_pmd_regs(struct device_node *np)
{
struct cbe_regs_map *map = cbe_find_map(np);
if (map == NULL)
return NULL;
return map->pmd_regs;
}
EXPORT_SYMBOL_GPL(cbe_get_pmd_regs);
struct cbe_pmd_regs __iomem *cbe_get_cpu_pmd_regs(int cpu)
{
struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
if (map == NULL)
return NULL;
return map->pmd_regs;
}
EXPORT_SYMBOL_GPL(cbe_get_cpu_pmd_regs);
struct cbe_pmd_shadow_regs *cbe_get_pmd_shadow_regs(struct device_node *np)
{
struct cbe_regs_map *map = cbe_find_map(np);
if (map == NULL)
return NULL;
return &map->pmd_shadow_regs;
}
struct cbe_pmd_shadow_regs *cbe_get_cpu_pmd_shadow_regs(int cpu)
{
struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
if (map == NULL)
return NULL;
return &map->pmd_shadow_regs;
}
struct cbe_iic_regs __iomem *cbe_get_iic_regs(struct device_node *np)
{
struct cbe_regs_map *map = cbe_find_map(np);
if (map == NULL)
return NULL;
return map->iic_regs;
}
struct cbe_iic_regs __iomem *cbe_get_cpu_iic_regs(int cpu)
{
struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
if (map == NULL)
return NULL;
return map->iic_regs;
}
struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np)
{
struct cbe_regs_map *map = cbe_find_map(np);
if (map == NULL)
return NULL;
return map->mic_tm_regs;
}
struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu)
{
struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
if (map == NULL)
return NULL;
return map->mic_tm_regs;
}
EXPORT_SYMBOL_GPL(cbe_get_cpu_mic_tm_regs);
[POWERPC] cell: Add oprofile support Add PPU event-based and cycle-based profiling support to Oprofile for Cell. Oprofile is expected to collect data on all CPUs simultaneously. However, there is one set of performance counters per node. There are two hardware threads or virtual CPUs on each node. Hence, OProfile must multiplex in time the performance counter collection on the two virtual CPUs. The multiplexing of the performance counters is done by a virtual counter routine. Initially, the counters are configured to collect data on the even CPUs in the system, one CPU per node. In order to capture the PC for the virtual CPU when the performance counter interrupt occurs (the specified number of events between samples has occurred), the even processors are configured to handle the performance counter interrupts for their node. The virtual counter routine is called via a kernel timer after the virtual sample time. The routine stops the counters, saves the current counts, loads the last counts for the other virtual CPU on the node, sets interrupts to be handled by the other virtual CPU and restarts the counters, the virtual timer routine is scheduled to run again. The virtual sample time is kept relatively small to make sure sampling occurs on both CPUs on the node with a relatively small granularity. Whenever the counters overflow, the performance counter interrupt is called to collect the PC for the CPU where data is being collected. The oprofile driver relies on a firmware RTAS call to setup the debug bus to route the desired signals to the performance counter hardware to be counted. The RTAS call must set the routing registers appropriately in each of the islands to pass the signals down the debug bus as well as routing the signals from a particular island onto the bus. There is a second firmware RTAS call to reset the debug bus to the non pass thru state when the counters are not in use. Signed-off-by: Carl Love <carll@us.ibm.com> Signed-off-by: Maynard Johnson <mpjohn@us.ibm.com> Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com> Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-11-20 17:45:16 +00:00
u32 cbe_get_hw_thread_id(int cpu)
{
return cbe_thread_map[cpu].thread_id;
[POWERPC] cell: Add oprofile support Add PPU event-based and cycle-based profiling support to Oprofile for Cell. Oprofile is expected to collect data on all CPUs simultaneously. However, there is one set of performance counters per node. There are two hardware threads or virtual CPUs on each node. Hence, OProfile must multiplex in time the performance counter collection on the two virtual CPUs. The multiplexing of the performance counters is done by a virtual counter routine. Initially, the counters are configured to collect data on the even CPUs in the system, one CPU per node. In order to capture the PC for the virtual CPU when the performance counter interrupt occurs (the specified number of events between samples has occurred), the even processors are configured to handle the performance counter interrupts for their node. The virtual counter routine is called via a kernel timer after the virtual sample time. The routine stops the counters, saves the current counts, loads the last counts for the other virtual CPU on the node, sets interrupts to be handled by the other virtual CPU and restarts the counters, the virtual timer routine is scheduled to run again. The virtual sample time is kept relatively small to make sure sampling occurs on both CPUs on the node with a relatively small granularity. Whenever the counters overflow, the performance counter interrupt is called to collect the PC for the CPU where data is being collected. The oprofile driver relies on a firmware RTAS call to setup the debug bus to route the desired signals to the performance counter hardware to be counted. The RTAS call must set the routing registers appropriately in each of the islands to pass the signals down the debug bus as well as routing the signals from a particular island onto the bus. There is a second firmware RTAS call to reset the debug bus to the non pass thru state when the counters are not in use. Signed-off-by: Carl Love <carll@us.ibm.com> Signed-off-by: Maynard Johnson <mpjohn@us.ibm.com> Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com> Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-11-20 17:45:16 +00:00
}
EXPORT_SYMBOL_GPL(cbe_get_hw_thread_id);
u32 cbe_cpu_to_node(int cpu)
{
return cbe_thread_map[cpu].cbe_id;
}
EXPORT_SYMBOL_GPL(cbe_cpu_to_node);
u32 cbe_node_to_cpu(int node)
{
return find_first_bit( (unsigned long *) &cbe_local_mask[node], sizeof(cpumask_t));
}
EXPORT_SYMBOL_GPL(cbe_node_to_cpu);
static struct device_node *cbe_get_be_node(int cpu_id)
{
struct device_node *np;
for_each_node_by_type (np, "be") {
int len,i;
const phandle *cpu_handle;
cpu_handle = of_get_property(np, "cpus", &len);
/*
* the CAB SLOF tree is non compliant, so we just assume
* there is only one node
*/
if (WARN_ON_ONCE(!cpu_handle))
return np;
for (i=0; i<len; i++)
if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL))
return np;
}
return NULL;
}
void __init cbe_fill_regs_map(struct cbe_regs_map *map)
{
if(map->be_node) {
struct device_node *be, *np;
be = map->be_node;
for_each_node_by_type(np, "pervasive")
if (of_get_parent(np) == be)
map->pmd_regs = of_iomap(np, 0);
for_each_node_by_type(np, "CBEA-Internal-Interrupt-Controller")
if (of_get_parent(np) == be)
map->iic_regs = of_iomap(np, 2);
for_each_node_by_type(np, "mic-tm")
if (of_get_parent(np) == be)
map->mic_tm_regs = of_iomap(np, 0);
} else {
struct device_node *cpu;
/* That hack must die die die ! */
const struct address_prop {
unsigned long address;
unsigned int len;
} __attribute__((packed)) *prop;
cpu = map->cpu_node;
prop = of_get_property(cpu, "pervasive", NULL);
if (prop != NULL)
map->pmd_regs = ioremap(prop->address, prop->len);
prop = of_get_property(cpu, "iic", NULL);
if (prop != NULL)
map->iic_regs = ioremap(prop->address, prop->len);
prop = of_get_property(cpu, "mic-tm", NULL);
if (prop != NULL)
map->mic_tm_regs = ioremap(prop->address, prop->len);
}
}
void __init cbe_regs_init(void)
{
int i;
unsigned int thread_id;
struct device_node *cpu;
/* Build local fast map of CPUs */
for_each_possible_cpu(i) {
cbe_thread_map[i].cpu_node = of_get_cpu_node(i, &thread_id);
cbe_thread_map[i].be_node = cbe_get_be_node(i);
cbe_thread_map[i].thread_id = thread_id;
}
/* Find maps for each device tree CPU */
for_each_node_by_type(cpu, "cpu") {
struct cbe_regs_map *map;
unsigned int cbe_id;
cbe_id = cbe_regs_map_count++;
map = &cbe_regs_maps[cbe_id];
if (cbe_regs_map_count > MAX_CBE) {
printk(KERN_ERR "cbe_regs: More BE chips than supported"
"!\n");
cbe_regs_map_count--;
of_node_put(cpu);
return;
}
map->cpu_node = cpu;
for_each_possible_cpu(i) {
struct cbe_thread_map *thread = &cbe_thread_map[i];
if (thread->cpu_node == cpu) {
thread->regs = map;
thread->cbe_id = cbe_id;
map->be_node = thread->be_node;
cpu_set(i, cbe_local_mask[cbe_id]);
if(thread->thread_id == 0)
cpu_set(i, cbe_first_online_cpu);
}
}
cbe_fill_regs_map(map);
}
}