[linux-mm-cc] [PATCH 09/12] avoid OOM : shrink ccache and eat-ccache-allocator

Fri Jul 20 06:45:26 EDT 2007

If the eat-ccache-allocator fails to allocate memory,
it frees the oldest fs_backed chunk_head(s) and retry.
If fail to allocate memory under lock situation,
release the lock once, schedule, try to lock,
free chunk_head(s), and retry to allocate again and again.


Signed-off-by: IKEDA, Munehiro <m-ikeda at ds.jp.nec.com>
---
 mm/ccache.c |  208 +++++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 files changed, 188 insertions(+), 20 deletions(-)

diff --git a/mm/ccache.c b/mm/ccache.c
index 316e686..790027a 100644
--- a/mm/ccache.c
+++ b/mm/ccache.c
@@ -325,6 +325,76 @@ static int free_chunk_head(struct chunk_head *ch)
 	return freed;
 }
 
+static int shrink_anon_ccache(void)
+{
+	/* temporarily do nothing */
+	return 0;
+}
+
+static int shrink_fs_backed_ccache(void)
+{
+	struct chunk_head *ch;
+	struct list_head *p;
+	int freed;
+
+	spin_lock(&ccache_lock);
+	if (list_empty(&lru_fs_backed)){
+		CC_DEBUG("no fs_backed_ccache");
+		spin_unlock(&ccache_lock);
+		return 0;
+	}
+
+	list_for_each_prev(p, &lru_fs_backed) {
+		ch = list_entry(p, struct chunk_head, lru);
+		if (bit_spin_trylock(PG_locked, &ch->flags))
+			break;
+	}
+	if (p == &lru_fs_backed){
+		CC_DEBUG("all pages locked");
+		spin_unlock(&ccache_lock);
+		return -EAGAIN;
+	}
+		
+	list_del_init(&ch->lru);
+	spin_unlock(&ccache_lock);
+
+	write_lock_irq(&ch->mapping->tree_lock);
+	if (radix_tree_delete(&ch->mapping->page_tree, ch->offset))
+		ch->mapping->nrpages--;
+	else
+		CC_DEBUG("try to delete from radix tree, but no slot");
+	write_unlock_irq(&ch->mapping->tree_lock);
+	
+	freed = __free_chunk_head(ch);
+	bit_spin_unlock(PG_locked, &ch->flags);
+	release_chunk_head(ch);
+	atomic_dec(&fs_backed_cc_size);
+
+	return freed;
+}
+
+/*
+ * Try to shrink ccache
+ */
+static int shrink_ccache(int goal)
+{
+	int reclaim = 0;
+	int ret;
+
+	while (reclaim < goal){
+		ret = shrink_fs_backed_ccache();
+		if (ret < 0)
+			return ret;
+		reclaim += ret;
+	
+		ret = shrink_anon_ccache();
+		if (ret < 0)
+			return ret;
+		reclaim += ret;
+	}
+	return reclaim;
+}
+
 /*
  * Get a no. of chunks from free list for 'total_size'.
  * Allocate more chunks if required.
@@ -333,6 +403,7 @@ static int free_chunk_head(struct chunk_head *ch)
 static struct chunk_head* get_enough_chunks(unsigned int total_size)
 {
 	int ret = -ENOMEM, rem = total_size;
+	unsigned long cc_size;
 	struct chunk *chunk, *tail = NULL;
 	/* take this from slab */
 	struct chunk_head *ch = kmalloc(sizeof(struct chunk_head), GFP_KERNEL);
@@ -376,10 +447,20 @@ repeat:
 	spin_unlock(&ccache_lock);
 
 	/* Free list didn't have enough chunks. Get more! */
-	ret = expand_ccache();
-	if (ret)
-		goto out;
-
+	cc_size = (unsigned long)(atomic_read(&anon_cc_size) +
+				  atomic_read(&fs_backed_cc_size));
+	if (cc_size >= max_anon_cc_size + max_fs_backed_cc_size){
+		if ((ret = shrink_ccache(rem))<rem){
+			CC_DEBUG("shrink_ccache(%d) returned %d", rem, ret);
+			goto out;
+		}
+	}
+	else{
+		if ((ret = expand_ccache())<0){
+			CC_DEBUG("expand_ccache() returned %d", ret);
+			goto out;
+		}
+	}
 	goto repeat;
 out:
 	CC_INFO("function failed!!!");
@@ -616,6 +697,91 @@ int should_add_to_ccache(struct page *page)
 	return 0;
 }
 
+static int free_ccache_pages(int pages)
+{
+	unsigned int oldp, bytes;
+	int ret;
+	
+	oldp = atomic_read(&cc_pages);
+	bytes = PAGE_SIZE * pages;
+	do {
+		ret = shrink_ccache(bytes);
+		if (ret < 0)
+			break;
+		ret = oldp - atomic_read(&cc_pages);
+	} while (ret < pages);
+
+	return ret;
+}
+
+/*
+ * Try to alloc a page or eat ccache itself if fail
+ */
+static struct page *alloc_or_eat_ccache(gfp_t gfp_mask, int order)
+{
+	struct page *page;
+	gfp_t mask;
+
+	/* __GFP_NOFAIL is handled in this function */
+	mask = (gfp_mask&~__GFP_NOFAIL)| __GFP_NOWARN;
+	page = alloc_pages(mask, order);
+	if (page)
+		return page;
+
+	for (;;){
+		if (free_ccache_pages(1 << order) >= 0) {
+			page = alloc_pages(mask, order);
+			if (page)
+				return page;
+		}
+		if (!(gfp_mask & __GFP_NOFAIL))
+			return NULL;
+		schedule();
+	}
+}
+
+/*
+ * Try to alloc a page or eat ccache itself with lock.
+ * If fail to allocate, release the lock, schedule,
+ * acruire the lock and retry.
+ * 
+ */
+static struct page *alloc_or_eat_ccache_lock(gfp_t gfp_mask,
+					int order, struct chunk_head *ch)
+{
+	struct page *page;
+	gfp_t mask;
+
+	/* __GFP_NOFAIL is handled in this function */
+	mask = (gfp_mask & ~__GFP_NOFAIL)| (GFP_NOWAIT|__GFP_NOWARN);
+
+	page = alloc_pages(mask, order);
+	if (page)
+		return page;
+
+	for (;;){
+		if (free_ccache_pages(1 << order) >= 0) {
+			page = alloc_pages(mask, order);
+			if (page)
+				return page;
+		}
+
+		/* can't allocate ... schedule and retry later */
+		bit_spin_unlock(PG_locked, &ch->flags);
+		if (!(gfp_mask & __GFP_NOFAIL))
+			break;
+		schedule();
+		if (!bit_spin_trylock(PG_locked, &ch->flags))
+			break;
+	}
+
+	/*
+	 * Someone locked and already decompressing.
+	 * Here, no page allocated, no lock aquired
+	 */
+	return NULL;
+}
+
 /*
  * given chunk_head, gather the chunks into a page,
  * decompress it, and return resulting page.
@@ -624,7 +790,8 @@ static struct page *cc_readpage(struct chunk_head *ch)
 {
 	int ret = -ENOMEM, algo_idx;
 	unsigned int comp_size=0;
-	struct page *decomp_page, *comp_page;
+	struct page *comp_page = NULL;
+	struct page *decomp_page = NULL;
 	void *comp_data;
 	struct chunk *chunk, *tmp;
 	CC_DEBUG2("start");
@@ -640,23 +807,23 @@ static struct page *cc_readpage(struct chunk_head *ch)
 	 * -- Doing GFP_KERNEL giver higher chances that alloc will
 	 * be successfull but it may sleep (and hence doesn't work)!
 	 * -- What to do??
+	 *
+	 * --> alloc_or_eat_ccache_lock() is the solution proposal
 	 */
-	comp_page = alloc_page(GFP_ATOMIC);
+	comp_page = alloc_or_eat_ccache_lock(__GFP_NOFAIL, 0, ch);
 	if (!comp_page) {
-		CC_INFO("comp_page alloc failed!!!\n");
-		BUG();
-		return NULL;
+		CC_INFO("duplicated ccache reading");
+		goto out;
 	}
 	comp_data = page_address(comp_page);
 #if 0
 	decomp_page = alloc_page(GFP_ATOMIC);
 #endif
 	/* same comments apply as for comp_page alloc */
-	decomp_page = alloc_page(GFP_ATOMIC);
+	decomp_page = alloc_or_eat_ccache_lock(__GFP_NOFAIL, 0, ch);
 	if (!decomp_page) {
-		CC_INFO("decomp_page alloc failed!!!\n");
-		BUG();	// we normally hit this after some OOM kills :)
-		return NULL;
+		CC_INFO("duplicated ccache reading");
+		goto out;
 	}
 
 	chunk = ch->chunk_list;
@@ -698,8 +865,10 @@ static struct page *cc_readpage(struct chunk_head *ch)
 	CC_DEBUG2("decomp_page->flags=0x%08lx", decomp_page->flags);
 
 	set_page_private(comp_page, 0);
-	__free_page(comp_page);
 
+out:
+	if (comp_page)
+		__free_page(comp_page);
 	return decomp_page;
 }
 
@@ -725,7 +894,8 @@ int cc_writepage(struct page *page)
 	CC_DEBUG2("mapping=%p, flags=0x%08lx, index=%lu",
 			mapping, page->flags, page->index);
 
-	tmp_page = alloc_pages(GFP_KERNEL, 1);	/* page may expand */
+	/* page may expand */
+	tmp_page = alloc_or_eat_ccache(GFP_KERNEL, 1);
 	if (!tmp_page)
 		goto out;
 	
@@ -896,11 +1066,9 @@ struct page *handle_ccache_fault(struct chunk_head *ch,
 		page = cc_readpage(ch);
 		CC_DEBUG2("after cc_readpage");
 
-		if (!page) {
-			CC_INFO("cc_readpage failed!!!");
-			bit_spin_unlock(PG_locked, &ch->flags);
-			return NULL;
-		}
+		if (!page)
+			return cleanup_dup_fault(ch, mapping);
+
 		page->mapping = mapping;
 
 		write_lock_irq(&mapping->tree_lock);
-- 
1.4.4.4