mirror of
https://github.com/adulau/aha.git
synced 2025-01-03 22:53:18 +00:00
[PATCH] mm: unmap_vmas with inner ptlock
Remove the page_table_lock from around the calls to unmap_vmas, and replace the pte_offset_map in zap_pte_range by pte_offset_map_lock: all callers are now safe to descend without page_table_lock. Don't attempt fancy locking for hugepages, just take page_table_lock in unmap_hugepage_range. Which makes zap_hugepage_range, and the hugetlb test in zap_page_range, redundant: unmap_vmas calls unmap_hugepage_range anyway. Nor does unmap_vmas have much use for its mm arg now. The tlb_start_vma and tlb_end_vma in unmap_page_range are now called without page_table_lock: if they're implemented at all, they typically come down to flush_cache_range (usually done outside page_table_lock) and flush_tlb_range (which we already audited for the mprotect case). Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
8f4f8c164c
commit
508034a32b
6 changed files with 21 additions and 54 deletions
|
@ -92,7 +92,7 @@ out:
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Called under down_write(mmap_sem), page_table_lock is not held
|
* Called under down_write(mmap_sem).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
|
#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
|
||||||
|
@ -308,7 +308,6 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
|
||||||
|
|
||||||
vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) {
|
vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) {
|
||||||
unsigned long h_vm_pgoff;
|
unsigned long h_vm_pgoff;
|
||||||
unsigned long v_length;
|
|
||||||
unsigned long v_offset;
|
unsigned long v_offset;
|
||||||
|
|
||||||
h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT);
|
h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT);
|
||||||
|
@ -319,11 +318,8 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
|
||||||
if (h_vm_pgoff >= h_pgoff)
|
if (h_vm_pgoff >= h_pgoff)
|
||||||
v_offset = 0;
|
v_offset = 0;
|
||||||
|
|
||||||
v_length = vma->vm_end - vma->vm_start;
|
unmap_hugepage_range(vma,
|
||||||
|
vma->vm_start + v_offset, vma->vm_end);
|
||||||
zap_hugepage_range(vma,
|
|
||||||
vma->vm_start + v_offset,
|
|
||||||
v_length - v_offset);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,6 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
|
||||||
int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
|
int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
|
||||||
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
|
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
|
||||||
int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int);
|
int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int);
|
||||||
void zap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
|
|
||||||
void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
|
void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
|
||||||
int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
|
int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
|
||||||
int hugetlb_report_meminfo(char *);
|
int hugetlb_report_meminfo(char *);
|
||||||
|
@ -87,7 +86,6 @@ static inline unsigned long hugetlb_total_pages(void)
|
||||||
#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL)
|
#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL)
|
||||||
#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
|
#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
|
||||||
#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; })
|
#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; })
|
||||||
#define zap_hugepage_range(vma, start, len) BUG()
|
|
||||||
#define unmap_hugepage_range(vma, start, end) BUG()
|
#define unmap_hugepage_range(vma, start, end) BUG()
|
||||||
#define is_hugepage_mem_enough(size) 0
|
#define is_hugepage_mem_enough(size) 0
|
||||||
#define hugetlb_report_meminfo(buf) 0
|
#define hugetlb_report_meminfo(buf) 0
|
||||||
|
|
|
@ -682,7 +682,7 @@ struct zap_details {
|
||||||
|
|
||||||
unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
|
unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
|
||||||
unsigned long size, struct zap_details *);
|
unsigned long size, struct zap_details *);
|
||||||
unsigned long unmap_vmas(struct mmu_gather **tlb, struct mm_struct *mm,
|
unsigned long unmap_vmas(struct mmu_gather **tlb,
|
||||||
struct vm_area_struct *start_vma, unsigned long start_addr,
|
struct vm_area_struct *start_vma, unsigned long start_addr,
|
||||||
unsigned long end_addr, unsigned long *nr_accounted,
|
unsigned long end_addr, unsigned long *nr_accounted,
|
||||||
struct zap_details *);
|
struct zap_details *);
|
||||||
|
|
12
mm/hugetlb.c
12
mm/hugetlb.c
|
@ -314,6 +314,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
|
||||||
BUG_ON(start & ~HPAGE_MASK);
|
BUG_ON(start & ~HPAGE_MASK);
|
||||||
BUG_ON(end & ~HPAGE_MASK);
|
BUG_ON(end & ~HPAGE_MASK);
|
||||||
|
|
||||||
|
spin_lock(&mm->page_table_lock);
|
||||||
|
|
||||||
/* Update high watermark before we lower rss */
|
/* Update high watermark before we lower rss */
|
||||||
update_hiwater_rss(mm);
|
update_hiwater_rss(mm);
|
||||||
|
|
||||||
|
@ -333,17 +335,9 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
|
||||||
put_page(page);
|
put_page(page);
|
||||||
add_mm_counter(mm, file_rss, (int) -(HPAGE_SIZE / PAGE_SIZE));
|
add_mm_counter(mm, file_rss, (int) -(HPAGE_SIZE / PAGE_SIZE));
|
||||||
}
|
}
|
||||||
flush_tlb_range(vma, start, end);
|
|
||||||
}
|
|
||||||
|
|
||||||
void zap_hugepage_range(struct vm_area_struct *vma,
|
|
||||||
unsigned long start, unsigned long length)
|
|
||||||
{
|
|
||||||
struct mm_struct *mm = vma->vm_mm;
|
|
||||||
|
|
||||||
spin_lock(&mm->page_table_lock);
|
|
||||||
unmap_hugepage_range(vma, start, start + length);
|
|
||||||
spin_unlock(&mm->page_table_lock);
|
spin_unlock(&mm->page_table_lock);
|
||||||
|
flush_tlb_range(vma, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
|
int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
|
||||||
|
|
39
mm/memory.c
39
mm/memory.c
|
@ -551,10 +551,11 @@ static void zap_pte_range(struct mmu_gather *tlb,
|
||||||
{
|
{
|
||||||
struct mm_struct *mm = tlb->mm;
|
struct mm_struct *mm = tlb->mm;
|
||||||
pte_t *pte;
|
pte_t *pte;
|
||||||
|
spinlock_t *ptl;
|
||||||
int file_rss = 0;
|
int file_rss = 0;
|
||||||
int anon_rss = 0;
|
int anon_rss = 0;
|
||||||
|
|
||||||
pte = pte_offset_map(pmd, addr);
|
pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
|
||||||
do {
|
do {
|
||||||
pte_t ptent = *pte;
|
pte_t ptent = *pte;
|
||||||
if (pte_none(ptent))
|
if (pte_none(ptent))
|
||||||
|
@ -621,7 +622,7 @@ static void zap_pte_range(struct mmu_gather *tlb,
|
||||||
} while (pte++, addr += PAGE_SIZE, addr != end);
|
} while (pte++, addr += PAGE_SIZE, addr != end);
|
||||||
|
|
||||||
add_mm_rss(mm, file_rss, anon_rss);
|
add_mm_rss(mm, file_rss, anon_rss);
|
||||||
pte_unmap(pte - 1);
|
pte_unmap_unlock(pte - 1, ptl);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void zap_pmd_range(struct mmu_gather *tlb,
|
static inline void zap_pmd_range(struct mmu_gather *tlb,
|
||||||
|
@ -690,7 +691,6 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||||
/**
|
/**
|
||||||
* unmap_vmas - unmap a range of memory covered by a list of vma's
|
* unmap_vmas - unmap a range of memory covered by a list of vma's
|
||||||
* @tlbp: address of the caller's struct mmu_gather
|
* @tlbp: address of the caller's struct mmu_gather
|
||||||
* @mm: the controlling mm_struct
|
|
||||||
* @vma: the starting vma
|
* @vma: the starting vma
|
||||||
* @start_addr: virtual address at which to start unmapping
|
* @start_addr: virtual address at which to start unmapping
|
||||||
* @end_addr: virtual address at which to end unmapping
|
* @end_addr: virtual address at which to end unmapping
|
||||||
|
@ -699,10 +699,10 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||||
*
|
*
|
||||||
* Returns the end address of the unmapping (restart addr if interrupted).
|
* Returns the end address of the unmapping (restart addr if interrupted).
|
||||||
*
|
*
|
||||||
* Unmap all pages in the vma list. Called under page_table_lock.
|
* Unmap all pages in the vma list.
|
||||||
*
|
*
|
||||||
* We aim to not hold page_table_lock for too long (for scheduling latency
|
* We aim to not hold locks for too long (for scheduling latency reasons).
|
||||||
* reasons). So zap pages in ZAP_BLOCK_SIZE bytecounts. This means we need to
|
* So zap pages in ZAP_BLOCK_SIZE bytecounts. This means we need to
|
||||||
* return the ending mmu_gather to the caller.
|
* return the ending mmu_gather to the caller.
|
||||||
*
|
*
|
||||||
* Only addresses between `start' and `end' will be unmapped.
|
* Only addresses between `start' and `end' will be unmapped.
|
||||||
|
@ -714,7 +714,7 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||||
* ensure that any thus-far unmapped pages are flushed before unmap_vmas()
|
* ensure that any thus-far unmapped pages are flushed before unmap_vmas()
|
||||||
* drops the lock and schedules.
|
* drops the lock and schedules.
|
||||||
*/
|
*/
|
||||||
unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
|
unsigned long unmap_vmas(struct mmu_gather **tlbp,
|
||||||
struct vm_area_struct *vma, unsigned long start_addr,
|
struct vm_area_struct *vma, unsigned long start_addr,
|
||||||
unsigned long end_addr, unsigned long *nr_accounted,
|
unsigned long end_addr, unsigned long *nr_accounted,
|
||||||
struct zap_details *details)
|
struct zap_details *details)
|
||||||
|
@ -764,19 +764,15 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
|
||||||
tlb_finish_mmu(*tlbp, tlb_start, start);
|
tlb_finish_mmu(*tlbp, tlb_start, start);
|
||||||
|
|
||||||
if (need_resched() ||
|
if (need_resched() ||
|
||||||
need_lockbreak(&mm->page_table_lock) ||
|
|
||||||
(i_mmap_lock && need_lockbreak(i_mmap_lock))) {
|
(i_mmap_lock && need_lockbreak(i_mmap_lock))) {
|
||||||
if (i_mmap_lock) {
|
if (i_mmap_lock) {
|
||||||
/* must reset count of rss freed */
|
*tlbp = NULL;
|
||||||
*tlbp = tlb_gather_mmu(mm, fullmm);
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
spin_unlock(&mm->page_table_lock);
|
|
||||||
cond_resched();
|
cond_resched();
|
||||||
spin_lock(&mm->page_table_lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
*tlbp = tlb_gather_mmu(mm, fullmm);
|
*tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
|
||||||
tlb_start_valid = 0;
|
tlb_start_valid = 0;
|
||||||
zap_bytes = ZAP_BLOCK_SIZE;
|
zap_bytes = ZAP_BLOCK_SIZE;
|
||||||
}
|
}
|
||||||
|
@ -800,17 +796,11 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
|
||||||
unsigned long end = address + size;
|
unsigned long end = address + size;
|
||||||
unsigned long nr_accounted = 0;
|
unsigned long nr_accounted = 0;
|
||||||
|
|
||||||
if (is_vm_hugetlb_page(vma)) {
|
|
||||||
zap_hugepage_range(vma, address, size);
|
|
||||||
return end;
|
|
||||||
}
|
|
||||||
|
|
||||||
lru_add_drain();
|
lru_add_drain();
|
||||||
tlb = tlb_gather_mmu(mm, 0);
|
tlb = tlb_gather_mmu(mm, 0);
|
||||||
update_hiwater_rss(mm);
|
update_hiwater_rss(mm);
|
||||||
spin_lock(&mm->page_table_lock);
|
end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
|
||||||
end = unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
|
if (tlb)
|
||||||
spin_unlock(&mm->page_table_lock);
|
|
||||||
tlb_finish_mmu(tlb, address, end);
|
tlb_finish_mmu(tlb, address, end);
|
||||||
return end;
|
return end;
|
||||||
}
|
}
|
||||||
|
@ -1434,13 +1424,6 @@ again:
|
||||||
|
|
||||||
restart_addr = zap_page_range(vma, start_addr,
|
restart_addr = zap_page_range(vma, start_addr,
|
||||||
end_addr - start_addr, details);
|
end_addr - start_addr, details);
|
||||||
|
|
||||||
/*
|
|
||||||
* We cannot rely on the break test in unmap_vmas:
|
|
||||||
* on the one hand, we don't want to restart our loop
|
|
||||||
* just because that broke out for the page_table_lock;
|
|
||||||
* on the other hand, it does no test when vma is small.
|
|
||||||
*/
|
|
||||||
need_break = need_resched() ||
|
need_break = need_resched() ||
|
||||||
need_lockbreak(details->i_mmap_lock);
|
need_lockbreak(details->i_mmap_lock);
|
||||||
|
|
||||||
|
|
|
@ -1673,9 +1673,7 @@ static void unmap_region(struct mm_struct *mm,
|
||||||
lru_add_drain();
|
lru_add_drain();
|
||||||
tlb = tlb_gather_mmu(mm, 0);
|
tlb = tlb_gather_mmu(mm, 0);
|
||||||
update_hiwater_rss(mm);
|
update_hiwater_rss(mm);
|
||||||
spin_lock(&mm->page_table_lock);
|
unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
|
||||||
unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL);
|
|
||||||
spin_unlock(&mm->page_table_lock);
|
|
||||||
vm_unacct_memory(nr_accounted);
|
vm_unacct_memory(nr_accounted);
|
||||||
free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
|
free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
|
||||||
next? next->vm_start: 0);
|
next? next->vm_start: 0);
|
||||||
|
@ -1958,9 +1956,7 @@ void exit_mmap(struct mm_struct *mm)
|
||||||
tlb = tlb_gather_mmu(mm, 1);
|
tlb = tlb_gather_mmu(mm, 1);
|
||||||
/* Don't update_hiwater_rss(mm) here, do_exit already did */
|
/* Don't update_hiwater_rss(mm) here, do_exit already did */
|
||||||
/* Use -1 here to ensure all VMAs in the mm are unmapped */
|
/* Use -1 here to ensure all VMAs in the mm are unmapped */
|
||||||
spin_lock(&mm->page_table_lock);
|
end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
|
||||||
end = unmap_vmas(&tlb, mm, vma, 0, -1, &nr_accounted, NULL);
|
|
||||||
spin_unlock(&mm->page_table_lock);
|
|
||||||
vm_unacct_memory(nr_accounted);
|
vm_unacct_memory(nr_accounted);
|
||||||
free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
|
free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
|
||||||
tlb_finish_mmu(tlb, 0, end);
|
tlb_finish_mmu(tlb, 0, end);
|
||||||
|
|
Loading…
Reference in a new issue