mirror of
https://github.com/adulau/aha.git
synced 2024-12-28 11:46:19 +00:00
mm: lockless pagecache
Combine page_cache_get_speculative with lockless radix tree lookups to introduce lockless page cache lookups (ie. no mapping->tree_lock on the read-side). The only atomicity changes this introduces is that the gang pagecache lookup functions now behave as if they are implemented with multiple find_get_page calls, rather than operating on a snapshot of the pages. In practice, this atomicity guarantee is not used anyway, and it is to replace individual lookups, so these semantics are natural. Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Hugh Dickins <hugh@veritas.com> Cc: "Paul E. McKenney" <paulmck@us.ibm.com> Reviewed-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
e286781d5f
commit
a60637c858
1 changed files with 134 additions and 45 deletions
179
mm/filemap.c
179
mm/filemap.c
|
@ -637,15 +637,35 @@ void __lock_page_nosync(struct page *page)
|
||||||
* Is there a pagecache struct page at the given (mapping, offset) tuple?
|
* Is there a pagecache struct page at the given (mapping, offset) tuple?
|
||||||
* If yes, increment its refcount and return it; if no, return NULL.
|
* If yes, increment its refcount and return it; if no, return NULL.
|
||||||
*/
|
*/
|
||||||
struct page * find_get_page(struct address_space *mapping, pgoff_t offset)
|
struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
|
||||||
{
|
{
|
||||||
|
void **pagep;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
|
|
||||||
read_lock_irq(&mapping->tree_lock);
|
rcu_read_lock();
|
||||||
page = radix_tree_lookup(&mapping->page_tree, offset);
|
repeat:
|
||||||
if (page)
|
page = NULL;
|
||||||
page_cache_get(page);
|
pagep = radix_tree_lookup_slot(&mapping->page_tree, offset);
|
||||||
read_unlock_irq(&mapping->tree_lock);
|
if (pagep) {
|
||||||
|
page = radix_tree_deref_slot(pagep);
|
||||||
|
if (unlikely(!page || page == RADIX_TREE_RETRY))
|
||||||
|
goto repeat;
|
||||||
|
|
||||||
|
if (!page_cache_get_speculative(page))
|
||||||
|
goto repeat;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Has the page moved?
|
||||||
|
* This is part of the lockless pagecache protocol. See
|
||||||
|
* include/linux/pagemap.h for details.
|
||||||
|
*/
|
||||||
|
if (unlikely(page != *pagep)) {
|
||||||
|
page_cache_release(page);
|
||||||
|
goto repeat;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
return page;
|
return page;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(find_get_page);
|
EXPORT_SYMBOL(find_get_page);
|
||||||
|
@ -660,32 +680,22 @@ EXPORT_SYMBOL(find_get_page);
|
||||||
*
|
*
|
||||||
* Returns zero if the page was not present. find_lock_page() may sleep.
|
* Returns zero if the page was not present. find_lock_page() may sleep.
|
||||||
*/
|
*/
|
||||||
struct page *find_lock_page(struct address_space *mapping,
|
struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
|
||||||
pgoff_t offset)
|
|
||||||
{
|
{
|
||||||
struct page *page;
|
struct page *page;
|
||||||
|
|
||||||
repeat:
|
repeat:
|
||||||
read_lock_irq(&mapping->tree_lock);
|
page = find_get_page(mapping, offset);
|
||||||
page = radix_tree_lookup(&mapping->page_tree, offset);
|
|
||||||
if (page) {
|
if (page) {
|
||||||
page_cache_get(page);
|
lock_page(page);
|
||||||
if (TestSetPageLocked(page)) {
|
/* Has the page been truncated? */
|
||||||
read_unlock_irq(&mapping->tree_lock);
|
if (unlikely(page->mapping != mapping)) {
|
||||||
__lock_page(page);
|
unlock_page(page);
|
||||||
|
page_cache_release(page);
|
||||||
/* Has the page been truncated while we slept? */
|
goto repeat;
|
||||||
if (unlikely(page->mapping != mapping)) {
|
|
||||||
unlock_page(page);
|
|
||||||
page_cache_release(page);
|
|
||||||
goto repeat;
|
|
||||||
}
|
|
||||||
VM_BUG_ON(page->index != offset);
|
|
||||||
goto out;
|
|
||||||
}
|
}
|
||||||
|
VM_BUG_ON(page->index != offset);
|
||||||
}
|
}
|
||||||
read_unlock_irq(&mapping->tree_lock);
|
|
||||||
out:
|
|
||||||
return page;
|
return page;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(find_lock_page);
|
EXPORT_SYMBOL(find_lock_page);
|
||||||
|
@ -751,13 +761,39 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
|
||||||
{
|
{
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
unsigned int ret;
|
unsigned int ret;
|
||||||
|
unsigned int nr_found;
|
||||||
|
|
||||||
read_lock_irq(&mapping->tree_lock);
|
rcu_read_lock();
|
||||||
ret = radix_tree_gang_lookup(&mapping->page_tree,
|
restart:
|
||||||
(void **)pages, start, nr_pages);
|
nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
|
||||||
for (i = 0; i < ret; i++)
|
(void ***)pages, start, nr_pages);
|
||||||
page_cache_get(pages[i]);
|
ret = 0;
|
||||||
read_unlock_irq(&mapping->tree_lock);
|
for (i = 0; i < nr_found; i++) {
|
||||||
|
struct page *page;
|
||||||
|
repeat:
|
||||||
|
page = radix_tree_deref_slot((void **)pages[i]);
|
||||||
|
if (unlikely(!page))
|
||||||
|
continue;
|
||||||
|
/*
|
||||||
|
* this can only trigger if nr_found == 1, making livelock
|
||||||
|
* a non issue.
|
||||||
|
*/
|
||||||
|
if (unlikely(page == RADIX_TREE_RETRY))
|
||||||
|
goto restart;
|
||||||
|
|
||||||
|
if (!page_cache_get_speculative(page))
|
||||||
|
goto repeat;
|
||||||
|
|
||||||
|
/* Has the page moved? */
|
||||||
|
if (unlikely(page != *((void **)pages[i]))) {
|
||||||
|
page_cache_release(page);
|
||||||
|
goto repeat;
|
||||||
|
}
|
||||||
|
|
||||||
|
pages[ret] = page;
|
||||||
|
ret++;
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -778,19 +814,44 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
|
||||||
{
|
{
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
unsigned int ret;
|
unsigned int ret;
|
||||||
|
unsigned int nr_found;
|
||||||
|
|
||||||
read_lock_irq(&mapping->tree_lock);
|
rcu_read_lock();
|
||||||
ret = radix_tree_gang_lookup(&mapping->page_tree,
|
restart:
|
||||||
(void **)pages, index, nr_pages);
|
nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
|
||||||
for (i = 0; i < ret; i++) {
|
(void ***)pages, index, nr_pages);
|
||||||
if (pages[i]->mapping == NULL || pages[i]->index != index)
|
ret = 0;
|
||||||
|
for (i = 0; i < nr_found; i++) {
|
||||||
|
struct page *page;
|
||||||
|
repeat:
|
||||||
|
page = radix_tree_deref_slot((void **)pages[i]);
|
||||||
|
if (unlikely(!page))
|
||||||
|
continue;
|
||||||
|
/*
|
||||||
|
* this can only trigger if nr_found == 1, making livelock
|
||||||
|
* a non issue.
|
||||||
|
*/
|
||||||
|
if (unlikely(page == RADIX_TREE_RETRY))
|
||||||
|
goto restart;
|
||||||
|
|
||||||
|
if (page->mapping == NULL || page->index != index)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
page_cache_get(pages[i]);
|
if (!page_cache_get_speculative(page))
|
||||||
|
goto repeat;
|
||||||
|
|
||||||
|
/* Has the page moved? */
|
||||||
|
if (unlikely(page != *((void **)pages[i]))) {
|
||||||
|
page_cache_release(page);
|
||||||
|
goto repeat;
|
||||||
|
}
|
||||||
|
|
||||||
|
pages[ret] = page;
|
||||||
|
ret++;
|
||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
read_unlock_irq(&mapping->tree_lock);
|
rcu_read_unlock();
|
||||||
return i;
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(find_get_pages_contig);
|
EXPORT_SYMBOL(find_get_pages_contig);
|
||||||
|
|
||||||
|
@ -810,15 +871,43 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
|
||||||
{
|
{
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
unsigned int ret;
|
unsigned int ret;
|
||||||
|
unsigned int nr_found;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
restart:
|
||||||
|
nr_found = radix_tree_gang_lookup_tag_slot(&mapping->page_tree,
|
||||||
|
(void ***)pages, *index, nr_pages, tag);
|
||||||
|
ret = 0;
|
||||||
|
for (i = 0; i < nr_found; i++) {
|
||||||
|
struct page *page;
|
||||||
|
repeat:
|
||||||
|
page = radix_tree_deref_slot((void **)pages[i]);
|
||||||
|
if (unlikely(!page))
|
||||||
|
continue;
|
||||||
|
/*
|
||||||
|
* this can only trigger if nr_found == 1, making livelock
|
||||||
|
* a non issue.
|
||||||
|
*/
|
||||||
|
if (unlikely(page == RADIX_TREE_RETRY))
|
||||||
|
goto restart;
|
||||||
|
|
||||||
|
if (!page_cache_get_speculative(page))
|
||||||
|
goto repeat;
|
||||||
|
|
||||||
|
/* Has the page moved? */
|
||||||
|
if (unlikely(page != *((void **)pages[i]))) {
|
||||||
|
page_cache_release(page);
|
||||||
|
goto repeat;
|
||||||
|
}
|
||||||
|
|
||||||
|
pages[ret] = page;
|
||||||
|
ret++;
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
read_lock_irq(&mapping->tree_lock);
|
|
||||||
ret = radix_tree_gang_lookup_tag(&mapping->page_tree,
|
|
||||||
(void **)pages, *index, nr_pages, tag);
|
|
||||||
for (i = 0; i < ret; i++)
|
|
||||||
page_cache_get(pages[i]);
|
|
||||||
if (ret)
|
if (ret)
|
||||||
*index = pages[ret - 1]->index + 1;
|
*index = pages[ret - 1]->index + 1;
|
||||||
read_unlock_irq(&mapping->tree_lock);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(find_get_pages_tag);
|
EXPORT_SYMBOL(find_get_pages_tag);
|
||||||
|
|
Loading…
Reference in a new issue