bb13ffeb9f
When compaction was implemented it was known that scanning could potentially be excessive. The ideal was that a counter be maintained for each pageblock but maintaining this information would incur a severe penalty due to a shared writable cache line. It has reached the point where the scanning costs are a serious problem, particularly on long-lived systems where a large process starts and allocates a large number of THPs at the same time. Instead of using a shared counter, this patch adds another bit to the pageblock flags called PG_migrate_skip. If a pageblock is scanned by either migrate or free scanner and 0 pages were isolated, the pageblock is marked to be skipped in the future. When scanning, this bit is checked before any scanning takes place and the block skipped if set. The main difficulty with a patch like this is "when to ignore the cached information?" If it's ignored too often, the scanning rates will still be excessive. If the information is too stale then allocations will fail that might have otherwise succeeded. In this patch o CMA always ignores the information o If the migrate and free scanner meet then the cached information will be discarded if it's at least 5 seconds since the last time the cache was discarded o If there are a large number of allocation failures, discard the cache. The time-based heuristic is very clumsy but there are few choices for a better event. Depending solely on multiple allocation failures still allows excessive scanning when THP allocations are failing in quick succession due to memory pressure. Waiting until memory pressure is relieved would cause compaction to continually fail instead of using reclaim/compaction to try allocate the page. The time-based mechanism is clumsy but a better option is not obvious. Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: Rik van Riel <riel@redhat.com> Cc: Richard Davies <richard@arachsys.com> Cc: Shaohua Li <shli@kernel.org> Cc: Avi Kivity <avi@redhat.com> Acked-by: Rafael Aquini <aquini@redhat.com> Cc: Fengguang Wu <fengguang.wu@intel.com> Cc: Michal Nazarewicz <mina86@mina86.com> Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com> Cc: Kyungmin Park <kyungmin.park@samsung.com> Cc: Mark Brown <broonie@opensource.wolfsonmicro.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
89 lines
2.8 KiB
C
89 lines
2.8 KiB
C
/*
|
|
* Macros for manipulating and testing flags related to a
|
|
* pageblock_nr_pages number of pages.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation version 2 of the License
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
*
|
|
* Copyright (C) IBM Corporation, 2006
|
|
*
|
|
* Original author, Mel Gorman
|
|
* Major cleanups and reduction of bit operations, Andy Whitcroft
|
|
*/
|
|
#ifndef PAGEBLOCK_FLAGS_H
|
|
#define PAGEBLOCK_FLAGS_H
|
|
|
|
#include <linux/types.h>
|
|
|
|
/* Bit indices that affect a whole block of pages */
|
|
enum pageblock_bits {
|
|
PB_migrate,
|
|
PB_migrate_end = PB_migrate + 3 - 1,
|
|
/* 3 bits required for migrate types */
|
|
#ifdef CONFIG_COMPACTION
|
|
PB_migrate_skip,/* If set the block is skipped by compaction */
|
|
#endif /* CONFIG_COMPACTION */
|
|
NR_PAGEBLOCK_BITS
|
|
};
|
|
|
|
#ifdef CONFIG_HUGETLB_PAGE
|
|
|
|
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
|
|
|
|
/* Huge page sizes are variable */
|
|
extern int pageblock_order;
|
|
|
|
#else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
|
|
|
|
/* Huge pages are a constant size */
|
|
#define pageblock_order HUGETLB_PAGE_ORDER
|
|
|
|
#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
|
|
|
|
#else /* CONFIG_HUGETLB_PAGE */
|
|
|
|
/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
|
|
#define pageblock_order (MAX_ORDER-1)
|
|
|
|
#endif /* CONFIG_HUGETLB_PAGE */
|
|
|
|
#define pageblock_nr_pages (1UL << pageblock_order)
|
|
|
|
/* Forward declaration */
|
|
struct page;
|
|
|
|
/* Declarations for getting and setting flags. See mm/page_alloc.c */
|
|
unsigned long get_pageblock_flags_group(struct page *page,
|
|
int start_bitidx, int end_bitidx);
|
|
void set_pageblock_flags_group(struct page *page, unsigned long flags,
|
|
int start_bitidx, int end_bitidx);
|
|
|
|
#ifdef CONFIG_COMPACTION
|
|
#define get_pageblock_skip(page) \
|
|
get_pageblock_flags_group(page, PB_migrate_skip, \
|
|
PB_migrate_skip + 1)
|
|
#define clear_pageblock_skip(page) \
|
|
set_pageblock_flags_group(page, 0, PB_migrate_skip, \
|
|
PB_migrate_skip + 1)
|
|
#define set_pageblock_skip(page) \
|
|
set_pageblock_flags_group(page, 1, PB_migrate_skip, \
|
|
PB_migrate_skip + 1)
|
|
#endif /* CONFIG_COMPACTION */
|
|
|
|
#define get_pageblock_flags(page) \
|
|
get_pageblock_flags_group(page, 0, PB_migrate_end)
|
|
#define set_pageblock_flags(page, flags) \
|
|
set_pageblock_flags_group(page, flags, \
|
|
0, PB_migrate_end)
|
|
|
|
#endif /* PAGEBLOCK_FLAGS_H */
|