cgroups: use css id in swap cgroup for saving memory v5

Try to use CSS ID for records in swap_cgroup.  By this, on 64bit machine,
size of swap_cgroup goes down to 2 bytes from 8bytes.

This means, when 2GB of swap is equipped, (assume the page size is 4096bytes)

	From size of swap_cgroup = 2G/4k * 8 = 4Mbytes.
	To   size of swap_cgroup = 2G/4k * 2 = 1Mbytes.

Reduction is large.  Of course, there are trade-offs.  This CSS ID will
add overhead to swap-in/swap-out/swap-free.

But in general,
  - swap is a resource which the user tend to avoid use.
  - If swap is never used, swap_cgroup area is not used.
  - Reading traditional manuals, size of swap should be proportional to
    size of memory. Memory size of machine is increasing now.

I think reducing size of swap_cgroup makes sense.

Note:
  - ID->CSS lookup routine has no locks, it's under RCU-Read-Side.
  - memcg can be obsolete at rmdir() but not freed while refcnt from
    swap_cgroup is available.

Changelog v4->v5:
 - reworked on to memcg-charge-swapcache-to-proper-memcg.patch
Changlog ->v4:
 - fixed not configured case.
 - deleted unnecessary comments.
 - fixed NULL pointer bug.
 - fixed message in dmesg.

[nishimura@mxp.nes.nec.co.jp: css_tryget can be called twice in !PageCgroupUsed case]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
KAMEZAWA Hiroyuki 2009-04-02 16:57:45 -07:00 committed by Linus Torvalds
parent 3c776e6466
commit a3b2d69269
3 changed files with 82 additions and 37 deletions

View file

@ -91,24 +91,23 @@ static inline void page_cgroup_init(void)
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
#include <linux/swap.h>
extern struct mem_cgroup *
swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem);
extern struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent);
extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
extern unsigned short lookup_swap_cgroup(swp_entry_t ent);
extern int swap_cgroup_swapon(int type, unsigned long max_pages);
extern void swap_cgroup_swapoff(int type);
#else
#include <linux/swap.h>
static inline
struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
{
return NULL;
return 0;
}
static inline
struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
unsigned short lookup_swap_cgroup(swp_entry_t ent)
{
return NULL;
return 0;
}
static inline int

View file

@ -991,10 +991,31 @@ nomem:
return -ENOMEM;
}
/*
* A helper function to get mem_cgroup from ID. must be called under
* rcu_read_lock(). The caller must check css_is_removed() or some if
* it's concern. (dropping refcnt from swap can be called against removed
* memcg.)
*/
static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
{
struct cgroup_subsys_state *css;
/* ID 0 is unused ID */
if (!id)
return NULL;
css = css_lookup(&mem_cgroup_subsys, id);
if (!css)
return NULL;
return container_of(css, struct mem_cgroup, css);
}
static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
{
struct mem_cgroup *mem;
struct page_cgroup *pc;
unsigned short id;
swp_entry_t ent;
VM_BUG_ON(!PageLocked(page));
@ -1006,16 +1027,19 @@ static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
/*
* Used bit of swapcache is solid under page lock.
*/
if (PageCgroupUsed(pc))
if (PageCgroupUsed(pc)) {
mem = pc->mem_cgroup;
else {
if (mem && !css_tryget(&mem->css))
mem = NULL;
} else {
ent.val = page_private(page);
mem = lookup_swap_cgroup(ent);
id = lookup_swap_cgroup(ent);
rcu_read_lock();
mem = mem_cgroup_lookup(id);
if (mem && !css_tryget(&mem->css))
mem = NULL;
rcu_read_unlock();
}
if (!mem)
return NULL;
if (!css_tryget(&mem->css))
return NULL;
return mem;
}
@ -1276,12 +1300,22 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
if (do_swap_account && !ret && PageSwapCache(page)) {
swp_entry_t ent = {.val = page_private(page)};
unsigned short id;
/* avoid double counting */
mem = swap_cgroup_record(ent, NULL);
id = swap_cgroup_record(ent, 0);
rcu_read_lock();
mem = mem_cgroup_lookup(id);
if (mem) {
/*
* We did swap-in. Then, this entry is doubly counted
* both in mem and memsw. We uncharge it, here.
* Recorded ID can be obsolete. We avoid calling
* css_tryget()
*/
res_counter_uncharge(&mem->memsw, PAGE_SIZE);
mem_cgroup_put(mem);
}
rcu_read_unlock();
}
return ret;
}
@ -1346,13 +1380,21 @@ void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
*/
if (do_swap_account && PageSwapCache(page)) {
swp_entry_t ent = {.val = page_private(page)};
unsigned short id;
struct mem_cgroup *memcg;
memcg = swap_cgroup_record(ent, NULL);
id = swap_cgroup_record(ent, 0);
rcu_read_lock();
memcg = mem_cgroup_lookup(id);
if (memcg) {
/*
* This recorded memcg can be obsolete one. So, avoid
* calling css_tryget
*/
res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
mem_cgroup_put(memcg);
}
rcu_read_unlock();
}
/* add this page(page_cgroup) to the LRU we want. */
@ -1473,7 +1515,7 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
MEM_CGROUP_CHARGE_TYPE_SWAPOUT);
/* record memcg information */
if (do_swap_account && memcg) {
swap_cgroup_record(ent, memcg);
swap_cgroup_record(ent, css_id(&memcg->css));
mem_cgroup_get(memcg);
}
if (memcg)
@ -1488,15 +1530,23 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
void mem_cgroup_uncharge_swap(swp_entry_t ent)
{
struct mem_cgroup *memcg;
unsigned short id;
if (!do_swap_account)
return;
memcg = swap_cgroup_record(ent, NULL);
id = swap_cgroup_record(ent, 0);
rcu_read_lock();
memcg = mem_cgroup_lookup(id);
if (memcg) {
/*
* We uncharge this because swap is freed.
* This memcg can be obsolete one. We avoid calling css_tryget
*/
res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
mem_cgroup_put(memcg);
}
rcu_read_unlock();
}
#endif

View file

@ -285,12 +285,8 @@ struct swap_cgroup_ctrl {
struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
/*
* This 8bytes seems big..maybe we can reduce this when we can use "id" for
* cgroup rather than pointer.
*/
struct swap_cgroup {
struct mem_cgroup *val;
unsigned short id;
};
#define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup))
#define SC_POS_MASK (SC_PER_PAGE - 1)
@ -342,10 +338,10 @@ not_enough_page:
* @ent: swap entry to be recorded into
* @mem: mem_cgroup to be recorded
*
* Returns old value at success, NULL at failure.
* (Of course, old value can be NULL.)
* Returns old value at success, 0 at failure.
* (Of course, old value can be 0.)
*/
struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
{
int type = swp_type(ent);
unsigned long offset = swp_offset(ent);
@ -354,18 +350,18 @@ struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
struct swap_cgroup_ctrl *ctrl;
struct page *mappage;
struct swap_cgroup *sc;
struct mem_cgroup *old;
unsigned short old;
if (!do_swap_account)
return NULL;
return 0;
ctrl = &swap_cgroup_ctrl[type];
mappage = ctrl->map[idx];
sc = page_address(mappage);
sc += pos;
old = sc->val;
sc->val = mem;
old = sc->id;
sc->id = id;
return old;
}
@ -374,9 +370,9 @@ struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
* lookup_swap_cgroup - lookup mem_cgroup tied to swap entry
* @ent: swap entry to be looked up.
*
* Returns pointer to mem_cgroup at success. NULL at failure.
* Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
*/
struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
unsigned short lookup_swap_cgroup(swp_entry_t ent)
{
int type = swp_type(ent);
unsigned long offset = swp_offset(ent);
@ -385,16 +381,16 @@ struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
struct swap_cgroup_ctrl *ctrl;
struct page *mappage;
struct swap_cgroup *sc;
struct mem_cgroup *ret;
unsigned short ret;
if (!do_swap_account)
return NULL;
return 0;
ctrl = &swap_cgroup_ctrl[type];
mappage = ctrl->map[idx];
sc = page_address(mappage);
sc += pos;
ret = sc->val;
ret = sc->id;
return ret;
}
@ -432,7 +428,7 @@ int swap_cgroup_swapon(int type, unsigned long max_pages)
printk(KERN_INFO
"swap_cgroup: uses %ld bytes of vmalloc for pointer array space"
" and %ld bytes to hold mem_cgroup pointers on swap\n",
" and %ld bytes to hold mem_cgroup information per swap ents\n",
array_size, length * PAGE_SIZE);
printk(KERN_INFO
"swap_cgroup can be disabled by noswapaccount boot option.\n");