[SPARC64]: Access TSB with physical addresses when possible.

This way we don't need to lock the TSB into the TLB.
The trick is that every TSB load/store is registered into
a special instruction patch section.  The default uses
virtual addresses, and the patch instructions use physical
address load/stores.

We can't do this on all chips because only cheetah+ and later
have the physical variant of the atomic quad load.

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2006-02-01 15:55:21 -08:00
parent b0fd4e49ae
commit 517af33237
9 changed files with 233 additions and 52 deletions

View file

@ -4,7 +4,7 @@
srlx %g6, 48, %g5 ! Get context
brz,pn %g5, kvmap_dtlb ! Context 0 processing
nop ! Delay slot (fill me)
ldda [%g1] ASI_NUCLEUS_QUAD_LDD, %g4 ! Load TSB entry
TSB_LOAD_QUAD(%g1, %g4) ! Load TSB entry
nop ! Push branch to next I$ line
cmp %g4, %g6 ! Compare TAG

View file

@ -4,7 +4,7 @@
srlx %g6, 48, %g5 ! Get context
brz,pn %g5, kvmap_itlb ! Context 0 processing
nop ! Delay slot (fill me)
ldda [%g1] ASI_NUCLEUS_QUAD_LDD, %g4 ! Load TSB entry
TSB_LOAD_QUAD(%g1, %g4) ! Load TSB entry
cmp %g4, %g6 ! Compare TAG
sethi %hi(_PAGE_EXEC), %g4 ! Setup exec check

View file

@ -44,14 +44,14 @@ kvmap_itlb_tsb_miss:
kvmap_itlb_vmalloc_addr:
KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath)
TSB_LOCK_TAG(%g1, %g2, %g4)
KTSB_LOCK_TAG(%g1, %g2, %g4)
/* Load and check PTE. */
ldxa [%g5] ASI_PHYS_USE_EC, %g5
brgez,a,pn %g5, kvmap_itlb_longpath
stx %g0, [%g1]
KTSB_STORE(%g1, %g0)
TSB_WRITE(%g1, %g5, %g6)
KTSB_WRITE(%g1, %g5, %g6)
/* fallthrough to TLB load */
@ -69,9 +69,9 @@ kvmap_itlb_longpath:
kvmap_itlb_obp:
OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_itlb_longpath)
TSB_LOCK_TAG(%g1, %g2, %g4)
KTSB_LOCK_TAG(%g1, %g2, %g4)
TSB_WRITE(%g1, %g5, %g6)
KTSB_WRITE(%g1, %g5, %g6)
ba,pt %xcc, kvmap_itlb_load
nop
@ -79,9 +79,9 @@ kvmap_itlb_obp:
kvmap_dtlb_obp:
OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_dtlb_longpath)
TSB_LOCK_TAG(%g1, %g2, %g4)
KTSB_LOCK_TAG(%g1, %g2, %g4)
TSB_WRITE(%g1, %g5, %g6)
KTSB_WRITE(%g1, %g5, %g6)
ba,pt %xcc, kvmap_dtlb_load
nop
@ -114,14 +114,14 @@ kvmap_linear_patch:
kvmap_dtlb_vmalloc_addr:
KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
TSB_LOCK_TAG(%g1, %g2, %g4)
KTSB_LOCK_TAG(%g1, %g2, %g4)
/* Load and check PTE. */
ldxa [%g5] ASI_PHYS_USE_EC, %g5
brgez,a,pn %g5, kvmap_dtlb_longpath
stx %g0, [%g1]
KTSB_STORE(%g1, %g0)
TSB_WRITE(%g1, %g5, %g6)
KTSB_WRITE(%g1, %g5, %g6)
/* fallthrough to TLB load */

View file

@ -53,7 +53,7 @@ tsb_reload:
/* Load and check PTE. */
ldxa [%g5] ASI_PHYS_USE_EC, %g5
brgez,a,pn %g5, tsb_do_fault
stx %g0, [%g1]
TSB_STORE(%g1, %g0)
/* If it is larger than the base page size, don't
* bother putting it into the TSB.
@ -64,7 +64,7 @@ tsb_reload:
and %g2, %g4, %g2
cmp %g2, %g7
bne,a,pn %xcc, tsb_tlb_reload
stx %g0, [%g1]
TSB_STORE(%g1, %g0)
TSB_WRITE(%g1, %g5, %g6)
@ -131,13 +131,13 @@ winfix_trampoline:
/* Insert an entry into the TSB.
*
* %o0: TSB entry pointer
* %o0: TSB entry pointer (virt or phys address)
* %o1: tag
* %o2: pte
*/
.align 32
.globl tsb_insert
tsb_insert:
.globl __tsb_insert
__tsb_insert:
rdpr %pstate, %o5
wrpr %o5, PSTATE_IE, %pstate
TSB_LOCK_TAG(%o0, %g2, %g3)
@ -146,6 +146,31 @@ tsb_insert:
retl
nop
/* Flush the given TSB entry if it has the matching
* tag.
*
* %o0: TSB entry pointer (virt or phys address)
* %o1: tag
*/
.align 32
.globl tsb_flush
tsb_flush:
sethi %hi(TSB_TAG_LOCK_HIGH), %g2
1: TSB_LOAD_TAG(%o0, %g1)
srlx %g1, 32, %o3
andcc %o3, %g2, %g0
bne,pn %icc, 1b
membar #LoadLoad
cmp %g1, %o1
bne,pt %xcc, 2f
clr %o3
TSB_CAS_TAG(%o0, %g1, %o3)
cmp %g1, %o3
bne,pn %xcc, 1b
nop
2: retl
TSB_MEMBAR
/* Reload MMU related context switch state at
* schedule() time.
*

View file

@ -70,6 +70,10 @@ SECTIONS
.con_initcall.init : { *(.con_initcall.init) }
__con_initcall_end = .;
SECURITY_INIT
. = ALIGN(4);
__tsb_phys_patch = .;
.tsb_phys_patch : { *(.tsb_phys_patch) }
__tsb_phys_patch_end = .;
. = ALIGN(8192);
__initramfs_start = .;
.init.ramfs : { *(.init.ramfs) }

View file

@ -39,6 +39,7 @@
#include <asm/tlb.h>
#include <asm/spitfire.h>
#include <asm/sections.h>
#include <asm/tsb.h>
extern void device_scan(void);
@ -244,6 +245,16 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c
: "g1", "g7");
}
static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long pte)
{
unsigned long tsb_addr = (unsigned long) ent;
if (tlb_type == cheetah_plus)
tsb_addr = __pa(tsb_addr);
__tsb_insert(tsb_addr, tag, pte);
}
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte)
{
struct mm_struct *mm;
@ -1040,6 +1051,24 @@ unsigned long __init find_ecache_flush_span(unsigned long size)
return ~0UL;
}
static void __init tsb_phys_patch(void)
{
struct tsb_phys_patch_entry *p;
p = &__tsb_phys_patch;
while (p < &__tsb_phys_patch_end) {
unsigned long addr = p->addr;
*(unsigned int *) addr = p->insn;
wmb();
__asm__ __volatile__("flush %0"
: /* no outputs */
: "r" (addr));
p++;
}
}
/* paging_init() sets up the page tables */
extern void cheetah_ecache_flush_init(void);
@ -1052,6 +1081,9 @@ void __init paging_init(void)
unsigned long end_pfn, pages_avail, shift;
unsigned long real_end, i;
if (tlb_type == cheetah_plus)
tsb_phys_patch();
/* Find available physical memory... */
read_obp_memory("available", &pavail[0], &pavail_ents);

View file

@ -20,12 +20,9 @@ static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long nentries
return vaddr & (nentries - 1);
}
static inline int tag_compare(struct tsb *entry, unsigned long vaddr, unsigned long context)
static inline int tag_compare(unsigned long tag, unsigned long vaddr, unsigned long context)
{
if (context == ~0UL)
return 1;
return (entry->tag == ((vaddr >> 22) | (context << 48)));
return (tag == ((vaddr >> 22) | (context << 48)));
}
/* TSB flushes need only occur on the processor initiating the address
@ -41,7 +38,7 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end)
unsigned long hash = tsb_hash(v, KERNEL_TSB_NENTRIES);
struct tsb *ent = &swapper_tsb[hash];
if (tag_compare(ent, v, 0)) {
if (tag_compare(ent->tag, v, 0)) {
ent->tag = 0UL;
membar_storeload_storestore();
}
@ -52,24 +49,31 @@ void flush_tsb_user(struct mmu_gather *mp)
{
struct mm_struct *mm = mp->mm;
struct tsb *tsb = mm->context.tsb;
unsigned long ctx = ~0UL;
unsigned long nentries = mm->context.tsb_nentries;
unsigned long ctx, base;
int i;
if (CTX_VALID(mm->context))
ctx = CTX_HWBITS(mm->context);
if (unlikely(!CTX_VALID(mm->context)))
return;
ctx = CTX_HWBITS(mm->context);
if (tlb_type == cheetah_plus)
base = __pa(tsb);
else
base = (unsigned long) tsb;
for (i = 0; i < mp->tlb_nr; i++) {
unsigned long v = mp->vaddrs[i];
struct tsb *ent;
unsigned long tag, ent, hash;
v &= ~0x1UL;
ent = &tsb[tsb_hash(v, nentries)];
if (tag_compare(ent, v, ctx)) {
ent->tag = 0UL;
membar_storeload_storestore();
}
hash = tsb_hash(v, nentries);
ent = base + (hash * sizeof(struct tsb));
tag = (v >> 22UL) | (ctx << 48UL);
tsb_flush(ent, tag);
}
}
@ -84,6 +88,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
tte = (_PAGE_VALID | _PAGE_L | _PAGE_CP |
_PAGE_CV | _PAGE_P | _PAGE_W);
tsb_paddr = __pa(mm->context.tsb);
BUG_ON(tsb_paddr & (tsb_bytes - 1UL));
/* Use the smallest page size that can map the whole TSB
* in one TLB entry.
@ -144,13 +149,23 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
BUG();
};
tsb_reg |= base;
tsb_reg |= (tsb_paddr & (page_sz - 1UL));
tte |= (tsb_paddr & ~(page_sz - 1UL));
if (tlb_type == cheetah_plus) {
/* Physical mapping, no locked TLB entry for TSB. */
tsb_reg |= tsb_paddr;
mm->context.tsb_reg_val = tsb_reg;
mm->context.tsb_map_vaddr = 0;
mm->context.tsb_map_pte = 0;
} else {
tsb_reg |= base;
tsb_reg |= (tsb_paddr & (page_sz - 1UL));
tte |= (tsb_paddr & ~(page_sz - 1UL));
mm->context.tsb_reg_val = tsb_reg;
mm->context.tsb_map_vaddr = base;
mm->context.tsb_map_pte = tte;
}
mm->context.tsb_reg_val = tsb_reg;
mm->context.tsb_map_vaddr = base;
mm->context.tsb_map_pte = tte;
}
/* The page tables are locked against modifications while this
@ -168,13 +183,21 @@ static void copy_tsb(struct tsb *old_tsb, unsigned long old_size,
for (i = 0; i < old_nentries; i++) {
register unsigned long tag asm("o4");
register unsigned long pte asm("o5");
unsigned long v;
unsigned int hash;
unsigned long v, hash;
__asm__ __volatile__(
"ldda [%2] %3, %0"
: "=r" (tag), "=r" (pte)
: "r" (&old_tsb[i]), "i" (ASI_NUCLEUS_QUAD_LDD));
if (tlb_type == cheetah_plus) {
__asm__ __volatile__(
"ldda [%2] %3, %0"
: "=r" (tag), "=r" (pte)
: "r" (__pa(&old_tsb[i])),
"i" (ASI_QUAD_LDD_PHYS));
} else {
__asm__ __volatile__(
"ldda [%2] %3, %0"
: "=r" (tag), "=r" (pte)
: "r" (&old_tsb[i]),
"i" (ASI_NUCLEUS_QUAD_LDD));
}
if (!tag || (tag & (1UL << TSB_TAG_LOCK_BIT)))
continue;
@ -198,8 +221,20 @@ static void copy_tsb(struct tsb *old_tsb, unsigned long old_size,
v |= (i & (512UL - 1UL)) << 13UL;
hash = tsb_hash(v, new_nentries);
new_tsb[hash].tag = tag;
new_tsb[hash].pte = pte;
if (tlb_type == cheetah_plus) {
__asm__ __volatile__(
"stxa %0, [%1] %2\n\t"
"stxa %3, [%4] %2"
: /* no outputs */
: "r" (tag),
"r" (__pa(&new_tsb[hash].tag)),
"i" (ASI_PHYS_USE_EC),
"r" (pte),
"r" (__pa(&new_tsb[hash].pte)));
} else {
new_tsb[hash].tag = tag;
new_tsb[hash].pte = pte;
}
}
}

View file

@ -97,7 +97,8 @@ struct tsb {
unsigned long pte;
} __attribute__((aligned(TSB_ENTRY_ALIGNMENT)));
extern void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long pte);
extern void __tsb_insert(unsigned long ent, unsigned long tag, unsigned long pte);
extern void tsb_flush(unsigned long ent, unsigned long tag);
typedef struct {
unsigned long sparc64_ctx_val;

View file

@ -44,7 +44,89 @@
#define TSB_MEMBAR membar #StoreStore
/* Some cpus support physical address quad loads. We want to use
* those if possible so we don't need to hard-lock the TSB mapping
* into the TLB. We encode some instruction patching in order to
* support this.
*
* The kernel TSB is locked into the TLB by virtue of being in the
* kernel image, so we don't play these games for swapper_tsb access.
*/
#ifndef __ASSEMBLY__
struct tsb_phys_patch_entry {
unsigned int addr;
unsigned int insn;
};
extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
#endif
#define TSB_LOAD_QUAD(TSB, REG) \
661: ldda [TSB] ASI_NUCLEUS_QUAD_LDD, REG; \
.section .tsb_phys_patch, "ax"; \
.word 661b; \
ldda [TSB] ASI_QUAD_LDD_PHYS, REG; \
.previous
#define TSB_LOAD_TAG_HIGH(TSB, REG) \
661: lduwa [TSB] ASI_N, REG; \
.section .tsb_phys_patch, "ax"; \
.word 661b; \
lduwa [TSB] ASI_PHYS_USE_EC, REG; \
.previous
#define TSB_LOAD_TAG(TSB, REG) \
661: ldxa [TSB] ASI_N, REG; \
.section .tsb_phys_patch, "ax"; \
.word 661b; \
ldxa [TSB] ASI_PHYS_USE_EC, REG; \
.previous
#define TSB_CAS_TAG_HIGH(TSB, REG1, REG2) \
661: casa [TSB] ASI_N, REG1, REG2; \
.section .tsb_phys_patch, "ax"; \
.word 661b; \
casa [TSB] ASI_PHYS_USE_EC, REG1, REG2; \
.previous
#define TSB_CAS_TAG(TSB, REG1, REG2) \
661: casxa [TSB] ASI_N, REG1, REG2; \
.section .tsb_phys_patch, "ax"; \
.word 661b; \
casxa [TSB] ASI_PHYS_USE_EC, REG1, REG2; \
.previous
#define TSB_STORE(ADDR, VAL) \
661: stxa VAL, [ADDR] ASI_N; \
.section .tsb_phys_patch, "ax"; \
.word 661b; \
stxa VAL, [ADDR] ASI_PHYS_USE_EC; \
.previous
#define TSB_LOCK_TAG(TSB, REG1, REG2) \
99: TSB_LOAD_TAG_HIGH(TSB, REG1); \
sethi %hi(TSB_TAG_LOCK_HIGH), REG2;\
andcc REG1, REG2, %g0; \
bne,pn %icc, 99b; \
nop; \
TSB_CAS_TAG_HIGH(TSB, REG1, REG2); \
cmp REG1, REG2; \
bne,pn %icc, 99b; \
nop; \
TSB_MEMBAR
#define TSB_WRITE(TSB, TTE, TAG) \
add TSB, 0x8, TSB; \
TSB_STORE(TSB, TTE); \
sub TSB, 0x8, TSB; \
TSB_MEMBAR; \
TSB_STORE(TSB, TAG);
#define KTSB_LOAD_QUAD(TSB, REG) \
ldda [TSB] ASI_NUCLEUS_QUAD_LDD, REG;
#define KTSB_STORE(ADDR, VAL) \
stxa VAL, [ADDR] ASI_N;
#define KTSB_LOCK_TAG(TSB, REG1, REG2) \
99: lduwa [TSB] ASI_N, REG1; \
sethi %hi(TSB_TAG_LOCK_HIGH), REG2;\
andcc REG1, REG2, %g0; \
@ -56,10 +138,12 @@
nop; \
TSB_MEMBAR
#define TSB_WRITE(TSB, TTE, TAG) \
stx TTE, [TSB + 0x08]; \
TSB_MEMBAR; \
stx TAG, [TSB + 0x00];
#define KTSB_WRITE(TSB, TTE, TAG) \
add TSB, 0x8, TSB; \
stxa TTE, [TSB] ASI_N; \
sub TSB, 0x8, TSB; \
TSB_MEMBAR; \
stxa TAG, [TSB] ASI_N;
/* Do a kernel page table walk. Leaves physical PTE pointer in
* REG1. Jumps to FAIL_LABEL on early page table walk termination.
@ -157,7 +241,7 @@
and REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \
sllx REG2, 4, REG2; \
add REG1, REG2, REG2; \
ldda [REG2] ASI_NUCLEUS_QUAD_LDD, REG3; \
KTSB_LOAD_QUAD(REG2, REG3); \
cmp REG3, TAG; \
be,a,pt %xcc, OK_LABEL; \
mov REG4, REG1;