mm: memcontrol: take a css reference for each charged page
Charges currently pin the css indirectly by playing tricks during css_offline(): user pages stall the offlining process until all of them have been reparented, whereas kmemcg acquires a keep-alive reference if outstanding kernel pages are detected at that point. In preparation for removing all this complexity, make the pinning explicit and acquire a css references for every charged page. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Vladimir Davydov <vdavydov@parallels.com> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: David Rientjes <rientjes@google.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
5ac8fb31ad
commit
e8ea14cc6e
3 changed files with 92 additions and 24 deletions
|
@ -112,6 +112,19 @@ static inline void css_get(struct cgroup_subsys_state *css)
|
||||||
percpu_ref_get(&css->refcnt);
|
percpu_ref_get(&css->refcnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* css_get_many - obtain references on the specified css
|
||||||
|
* @css: target css
|
||||||
|
* @n: number of references to get
|
||||||
|
*
|
||||||
|
* The caller must already have a reference.
|
||||||
|
*/
|
||||||
|
static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n)
|
||||||
|
{
|
||||||
|
if (!(css->flags & CSS_NO_REF))
|
||||||
|
percpu_ref_get_many(&css->refcnt, n);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* css_tryget - try to obtain a reference on the specified css
|
* css_tryget - try to obtain a reference on the specified css
|
||||||
* @css: target css
|
* @css: target css
|
||||||
|
@ -159,6 +172,19 @@ static inline void css_put(struct cgroup_subsys_state *css)
|
||||||
percpu_ref_put(&css->refcnt);
|
percpu_ref_put(&css->refcnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* css_put_many - put css references
|
||||||
|
* @css: target css
|
||||||
|
* @n: number of references to put
|
||||||
|
*
|
||||||
|
* Put references obtained via css_get() and css_tryget_online().
|
||||||
|
*/
|
||||||
|
static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
|
||||||
|
{
|
||||||
|
if (!(css->flags & CSS_NO_REF))
|
||||||
|
percpu_ref_put_many(&css->refcnt, n);
|
||||||
|
}
|
||||||
|
|
||||||
/* bits in struct cgroup flags field */
|
/* bits in struct cgroup flags field */
|
||||||
enum {
|
enum {
|
||||||
/* Control Group requires release notifications to userspace */
|
/* Control Group requires release notifications to userspace */
|
||||||
|
|
|
@ -146,6 +146,29 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* percpu_ref_get_many - increment a percpu refcount
|
||||||
|
* @ref: percpu_ref to get
|
||||||
|
* @nr: number of references to get
|
||||||
|
*
|
||||||
|
* Analogous to atomic_long_add().
|
||||||
|
*
|
||||||
|
* This function is safe to call as long as @ref is between init and exit.
|
||||||
|
*/
|
||||||
|
static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr)
|
||||||
|
{
|
||||||
|
unsigned long __percpu *percpu_count;
|
||||||
|
|
||||||
|
rcu_read_lock_sched();
|
||||||
|
|
||||||
|
if (__ref_is_percpu(ref, &percpu_count))
|
||||||
|
this_cpu_add(*percpu_count, nr);
|
||||||
|
else
|
||||||
|
atomic_long_add(nr, &ref->count);
|
||||||
|
|
||||||
|
rcu_read_unlock_sched();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* percpu_ref_get - increment a percpu refcount
|
* percpu_ref_get - increment a percpu refcount
|
||||||
* @ref: percpu_ref to get
|
* @ref: percpu_ref to get
|
||||||
|
@ -156,16 +179,7 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref,
|
||||||
*/
|
*/
|
||||||
static inline void percpu_ref_get(struct percpu_ref *ref)
|
static inline void percpu_ref_get(struct percpu_ref *ref)
|
||||||
{
|
{
|
||||||
unsigned long __percpu *percpu_count;
|
percpu_ref_get_many(ref, 1);
|
||||||
|
|
||||||
rcu_read_lock_sched();
|
|
||||||
|
|
||||||
if (__ref_is_percpu(ref, &percpu_count))
|
|
||||||
this_cpu_inc(*percpu_count);
|
|
||||||
else
|
|
||||||
atomic_long_inc(&ref->count);
|
|
||||||
|
|
||||||
rcu_read_unlock_sched();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -230,6 +244,30 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* percpu_ref_put_many - decrement a percpu refcount
|
||||||
|
* @ref: percpu_ref to put
|
||||||
|
* @nr: number of references to put
|
||||||
|
*
|
||||||
|
* Decrement the refcount, and if 0, call the release function (which was passed
|
||||||
|
* to percpu_ref_init())
|
||||||
|
*
|
||||||
|
* This function is safe to call as long as @ref is between init and exit.
|
||||||
|
*/
|
||||||
|
static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr)
|
||||||
|
{
|
||||||
|
unsigned long __percpu *percpu_count;
|
||||||
|
|
||||||
|
rcu_read_lock_sched();
|
||||||
|
|
||||||
|
if (__ref_is_percpu(ref, &percpu_count))
|
||||||
|
this_cpu_sub(*percpu_count, nr);
|
||||||
|
else if (unlikely(atomic_long_sub_and_test(nr, &ref->count)))
|
||||||
|
ref->release(ref);
|
||||||
|
|
||||||
|
rcu_read_unlock_sched();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* percpu_ref_put - decrement a percpu refcount
|
* percpu_ref_put - decrement a percpu refcount
|
||||||
* @ref: percpu_ref to put
|
* @ref: percpu_ref to put
|
||||||
|
@ -241,16 +279,7 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
|
||||||
*/
|
*/
|
||||||
static inline void percpu_ref_put(struct percpu_ref *ref)
|
static inline void percpu_ref_put(struct percpu_ref *ref)
|
||||||
{
|
{
|
||||||
unsigned long __percpu *percpu_count;
|
percpu_ref_put_many(ref, 1);
|
||||||
|
|
||||||
rcu_read_lock_sched();
|
|
||||||
|
|
||||||
if (__ref_is_percpu(ref, &percpu_count))
|
|
||||||
this_cpu_dec(*percpu_count);
|
|
||||||
else if (unlikely(atomic_long_dec_and_test(&ref->count)))
|
|
||||||
ref->release(ref);
|
|
||||||
|
|
||||||
rcu_read_unlock_sched();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -2273,6 +2273,7 @@ static void drain_stock(struct memcg_stock_pcp *stock)
|
||||||
page_counter_uncharge(&old->memory, stock->nr_pages);
|
page_counter_uncharge(&old->memory, stock->nr_pages);
|
||||||
if (do_swap_account)
|
if (do_swap_account)
|
||||||
page_counter_uncharge(&old->memsw, stock->nr_pages);
|
page_counter_uncharge(&old->memsw, stock->nr_pages);
|
||||||
|
css_put_many(&old->css, stock->nr_pages);
|
||||||
stock->nr_pages = 0;
|
stock->nr_pages = 0;
|
||||||
}
|
}
|
||||||
stock->cached = NULL;
|
stock->cached = NULL;
|
||||||
|
@ -2530,6 +2531,7 @@ bypass:
|
||||||
return -EINTR;
|
return -EINTR;
|
||||||
|
|
||||||
done_restock:
|
done_restock:
|
||||||
|
css_get_many(&memcg->css, batch);
|
||||||
if (batch > nr_pages)
|
if (batch > nr_pages)
|
||||||
refill_stock(memcg, batch - nr_pages);
|
refill_stock(memcg, batch - nr_pages);
|
||||||
done:
|
done:
|
||||||
|
@ -2544,6 +2546,8 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
|
||||||
page_counter_uncharge(&memcg->memory, nr_pages);
|
page_counter_uncharge(&memcg->memory, nr_pages);
|
||||||
if (do_swap_account)
|
if (do_swap_account)
|
||||||
page_counter_uncharge(&memcg->memsw, nr_pages);
|
page_counter_uncharge(&memcg->memsw, nr_pages);
|
||||||
|
|
||||||
|
css_put_many(&memcg->css, nr_pages);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2739,6 +2743,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
|
||||||
page_counter_charge(&memcg->memory, nr_pages);
|
page_counter_charge(&memcg->memory, nr_pages);
|
||||||
if (do_swap_account)
|
if (do_swap_account)
|
||||||
page_counter_charge(&memcg->memsw, nr_pages);
|
page_counter_charge(&memcg->memsw, nr_pages);
|
||||||
|
css_get_many(&memcg->css, nr_pages);
|
||||||
ret = 0;
|
ret = 0;
|
||||||
} else if (ret)
|
} else if (ret)
|
||||||
page_counter_uncharge(&memcg->kmem, nr_pages);
|
page_counter_uncharge(&memcg->kmem, nr_pages);
|
||||||
|
@ -2754,8 +2759,10 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg,
|
||||||
page_counter_uncharge(&memcg->memsw, nr_pages);
|
page_counter_uncharge(&memcg->memsw, nr_pages);
|
||||||
|
|
||||||
/* Not down to 0 */
|
/* Not down to 0 */
|
||||||
if (page_counter_uncharge(&memcg->kmem, nr_pages))
|
if (page_counter_uncharge(&memcg->kmem, nr_pages)) {
|
||||||
|
css_put_many(&memcg->css, nr_pages);
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Releases a reference taken in kmem_cgroup_css_offline in case
|
* Releases a reference taken in kmem_cgroup_css_offline in case
|
||||||
|
@ -2767,6 +2774,8 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg,
|
||||||
*/
|
*/
|
||||||
if (memcg_kmem_test_and_clear_dead(memcg))
|
if (memcg_kmem_test_and_clear_dead(memcg))
|
||||||
css_put(&memcg->css);
|
css_put(&memcg->css);
|
||||||
|
|
||||||
|
css_put_many(&memcg->css, nr_pages);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -3394,10 +3403,13 @@ static int mem_cgroup_move_parent(struct page *page,
|
||||||
ret = mem_cgroup_move_account(page, nr_pages,
|
ret = mem_cgroup_move_account(page, nr_pages,
|
||||||
pc, child, parent);
|
pc, child, parent);
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
|
if (!mem_cgroup_is_root(parent))
|
||||||
|
css_get_many(&parent->css, nr_pages);
|
||||||
/* Take charge off the local counters */
|
/* Take charge off the local counters */
|
||||||
page_counter_cancel(&child->memory, nr_pages);
|
page_counter_cancel(&child->memory, nr_pages);
|
||||||
if (do_swap_account)
|
if (do_swap_account)
|
||||||
page_counter_cancel(&child->memsw, nr_pages);
|
page_counter_cancel(&child->memsw, nr_pages);
|
||||||
|
css_put_many(&child->css, nr_pages);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nr_pages > 1)
|
if (nr_pages > 1)
|
||||||
|
@ -5767,7 +5779,6 @@ static void __mem_cgroup_clear_mc(void)
|
||||||
{
|
{
|
||||||
struct mem_cgroup *from = mc.from;
|
struct mem_cgroup *from = mc.from;
|
||||||
struct mem_cgroup *to = mc.to;
|
struct mem_cgroup *to = mc.to;
|
||||||
int i;
|
|
||||||
|
|
||||||
/* we must uncharge all the leftover precharges from mc.to */
|
/* we must uncharge all the leftover precharges from mc.to */
|
||||||
if (mc.precharge) {
|
if (mc.precharge) {
|
||||||
|
@ -5795,8 +5806,7 @@ static void __mem_cgroup_clear_mc(void)
|
||||||
if (!mem_cgroup_is_root(mc.to))
|
if (!mem_cgroup_is_root(mc.to))
|
||||||
page_counter_uncharge(&mc.to->memory, mc.moved_swap);
|
page_counter_uncharge(&mc.to->memory, mc.moved_swap);
|
||||||
|
|
||||||
for (i = 0; i < mc.moved_swap; i++)
|
css_put_many(&mc.from->css, mc.moved_swap);
|
||||||
css_put(&mc.from->css);
|
|
||||||
|
|
||||||
/* we've already done css_get(mc.to) */
|
/* we've already done css_get(mc.to) */
|
||||||
mc.moved_swap = 0;
|
mc.moved_swap = 0;
|
||||||
|
@ -6343,6 +6353,9 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
|
||||||
__this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
|
__this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
|
||||||
memcg_check_events(memcg, dummy_page);
|
memcg_check_events(memcg, dummy_page);
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
|
|
||||||
|
if (!mem_cgroup_is_root(memcg))
|
||||||
|
css_put_many(&memcg->css, max(nr_mem, nr_memsw));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void uncharge_list(struct list_head *page_list)
|
static void uncharge_list(struct list_head *page_list)
|
||||||
|
|
Loading…
Reference in a new issue