3d59eebc5e
-----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.18 (GNU/Linux) iQIcBAABAgAGBQJQx0kQAAoJEHzG/DNEskfi4fQP/R5PRovayroZALBMLnVJDaLD Ttr9p40VNXbiJ+MfRgatJjSSJZ4Jl+fC3NEqBhcwVZhckZZb9R2s0WtrSQo5+ZbB vdRfiuKoCaKM4cSZ08C12uTvsF6xjhjd27CTUlMkyOcDoKxMEFKelv0hocSxe4Wo xqlv3eF+VsY7kE1BNbgBP06SX4tDpIHRxXfqJPMHaSKQmre+cU0xG2GcEu3QGbHT DEDTI788YSaWLmBfMC+kWoaQl1+bV/FYvavIAS8/o4K9IKvgR42VzrXmaFaqrbgb 72ksa6xfAi57yTmZHqyGmts06qYeBbPpKI+yIhCMInxA9CY3lPbvHppRf0RQOyzj YOi4hovGEMJKE+BCILukhJcZ9jCTtS3zut6v1rdvR88f4y7uhR9RfmRfsxuW7PNj 3Rmh191+n0lVWDmhOs2psXuCLJr3LEiA0dFffN1z8REUTtTAZMsj8Rz+SvBNAZDR hsJhERVeXB6X5uQ5rkLDzbn1Zic60LjVw7LIp6SF2OYf/YKaF8vhyWOA8dyCEu8W CGo7AoG0BO8tIIr8+LvFe8CweypysZImx4AjCfIs4u9pu/v11zmBvO9NO5yfuObF BreEERYgTes/UITxn1qdIW4/q+Nr0iKO3CTqsmu6L1GfCz3/XzPGs3U26fUhllqi Ka0JKgnWvsa6ez6FSzKI =ivQa -----END PGP SIGNATURE----- Merge tag 'balancenuma-v11' of git://git.kernel.org/pub/scm/linux/kernel/git/mel/linux-balancenuma Pull Automatic NUMA Balancing bare-bones from Mel Gorman: "There are three implementations for NUMA balancing, this tree (balancenuma), numacore which has been developed in tip/master and autonuma which is in aa.git. In almost all respects balancenuma is the dumbest of the three because its main impact is on the VM side with no attempt to be smart about scheduling. In the interest of getting the ball rolling, it would be desirable to see this much merged for 3.8 with the view to building scheduler smarts on top and adapting the VM where required for 3.9. The most recent set of comparisons available from different people are mel: https://lkml.org/lkml/2012/12/9/108 mingo: https://lkml.org/lkml/2012/12/7/331 tglx: https://lkml.org/lkml/2012/12/10/437 srikar: https://lkml.org/lkml/2012/12/10/397 The results are a mixed bag. In my own tests, balancenuma does reasonably well. It's dumb as rocks and does not regress against mainline. On the other hand, Ingo's tests shows that balancenuma is incapable of converging for this workloads driven by perf which is bad but is potentially explained by the lack of scheduler smarts. Thomas' results show balancenuma improves on mainline but falls far short of numacore or autonuma. Srikar's results indicate we all suffer on a large machine with imbalanced node sizes. My own testing showed that recent numacore results have improved dramatically, particularly in the last week but not universally. We've butted heads heavily on system CPU usage and high levels of migration even when it shows that overall performance is better. There are also cases where it regresses. Of interest is that for specjbb in some configurations it will regress for lower numbers of warehouses and show gains for higher numbers which is not reported by the tool by default and sometimes missed in treports. Recently I reported for numacore that the JVM was crashing with NullPointerExceptions but currently it's unclear what the source of this problem is. Initially I thought it was in how numacore batch handles PTEs but I'm no longer think this is the case. It's possible numacore is just able to trigger it due to higher rates of migration. These reports were quite late in the cycle so I/we would like to start with this tree as it contains much of the code we can agree on and has not changed significantly over the last 2-3 weeks." * tag 'balancenuma-v11' of git://git.kernel.org/pub/scm/linux/kernel/git/mel/linux-balancenuma: (50 commits) mm/rmap, migration: Make rmap_walk_anon() and try_to_unmap_anon() more scalable mm/rmap: Convert the struct anon_vma::mutex to an rwsem mm: migrate: Account a transhuge page properly when rate limiting mm: numa: Account for failed allocations and isolations as migration failures mm: numa: Add THP migration for the NUMA working set scanning fault case build fix mm: numa: Add THP migration for the NUMA working set scanning fault case. mm: sched: numa: Delay PTE scanning until a task is scheduled on a new node mm: sched: numa: Control enabling and disabling of NUMA balancing if !SCHED_DEBUG mm: sched: numa: Control enabling and disabling of NUMA balancing mm: sched: Adapt the scanning rate if a NUMA hinting fault does not migrate mm: numa: Use a two-stage filter to restrict pages being migrated for unlikely task<->node relationships mm: numa: migrate: Set last_nid on newly allocated page mm: numa: split_huge_page: Transfer last_nid on tail page mm: numa: Introduce last_nid to the page frame sched: numa: Slowly increase the scanning period as NUMA faults are handled mm: numa: Rate limit setting of pte_numa if node is saturated mm: numa: Rate limit the amount of memory that is migrated between nodes mm: numa: Structures for Migrate On Fault per NUMA migration rate limiting mm: numa: Migrate pages handled during a pmd_numa hinting fault mm: numa: Migrate on reference policy ...
128 lines
4.1 KiB
C
128 lines
4.1 KiB
C
#ifndef _LINUX_MIGRATE_H
|
|
#define _LINUX_MIGRATE_H
|
|
|
|
#include <linux/mm.h>
|
|
#include <linux/mempolicy.h>
|
|
#include <linux/migrate_mode.h>
|
|
|
|
typedef struct page *new_page_t(struct page *, unsigned long private, int **);
|
|
|
|
/*
|
|
* Return values from addresss_space_operations.migratepage():
|
|
* - negative errno on page migration failure;
|
|
* - zero on page migration success;
|
|
*
|
|
* The balloon page migration introduces this special case where a 'distinct'
|
|
* return code is used to flag a successful page migration to unmap_and_move().
|
|
* This approach is necessary because page migration can race against balloon
|
|
* deflation procedure, and for such case we could introduce a nasty page leak
|
|
* if a successfully migrated balloon page gets released concurrently with
|
|
* migration's unmap_and_move() wrap-up steps.
|
|
*/
|
|
#define MIGRATEPAGE_SUCCESS 0
|
|
#define MIGRATEPAGE_BALLOON_SUCCESS 1 /* special ret code for balloon page
|
|
* sucessful migration case.
|
|
*/
|
|
enum migrate_reason {
|
|
MR_COMPACTION,
|
|
MR_MEMORY_FAILURE,
|
|
MR_MEMORY_HOTPLUG,
|
|
MR_SYSCALL, /* also applies to cpusets */
|
|
MR_MEMPOLICY_MBIND,
|
|
MR_NUMA_MISPLACED,
|
|
MR_CMA
|
|
};
|
|
|
|
#ifdef CONFIG_MIGRATION
|
|
|
|
extern void putback_lru_pages(struct list_head *l);
|
|
extern void putback_movable_pages(struct list_head *l);
|
|
extern int migrate_page(struct address_space *,
|
|
struct page *, struct page *, enum migrate_mode);
|
|
extern int migrate_pages(struct list_head *l, new_page_t x,
|
|
unsigned long private, bool offlining,
|
|
enum migrate_mode mode, int reason);
|
|
extern int migrate_huge_page(struct page *, new_page_t x,
|
|
unsigned long private, bool offlining,
|
|
enum migrate_mode mode);
|
|
|
|
extern int fail_migrate_page(struct address_space *,
|
|
struct page *, struct page *);
|
|
|
|
extern int migrate_prep(void);
|
|
extern int migrate_prep_local(void);
|
|
extern int migrate_vmas(struct mm_struct *mm,
|
|
const nodemask_t *from, const nodemask_t *to,
|
|
unsigned long flags);
|
|
extern void migrate_page_copy(struct page *newpage, struct page *page);
|
|
extern int migrate_huge_page_move_mapping(struct address_space *mapping,
|
|
struct page *newpage, struct page *page);
|
|
#else
|
|
|
|
static inline void putback_lru_pages(struct list_head *l) {}
|
|
static inline void putback_movable_pages(struct list_head *l) {}
|
|
static inline int migrate_pages(struct list_head *l, new_page_t x,
|
|
unsigned long private, bool offlining,
|
|
enum migrate_mode mode, int reason) { return -ENOSYS; }
|
|
static inline int migrate_huge_page(struct page *page, new_page_t x,
|
|
unsigned long private, bool offlining,
|
|
enum migrate_mode mode) { return -ENOSYS; }
|
|
|
|
static inline int migrate_prep(void) { return -ENOSYS; }
|
|
static inline int migrate_prep_local(void) { return -ENOSYS; }
|
|
|
|
static inline int migrate_vmas(struct mm_struct *mm,
|
|
const nodemask_t *from, const nodemask_t *to,
|
|
unsigned long flags)
|
|
{
|
|
return -ENOSYS;
|
|
}
|
|
|
|
static inline void migrate_page_copy(struct page *newpage,
|
|
struct page *page) {}
|
|
|
|
static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
|
|
struct page *newpage, struct page *page)
|
|
{
|
|
return -ENOSYS;
|
|
}
|
|
|
|
/* Possible settings for the migrate_page() method in address_operations */
|
|
#define migrate_page NULL
|
|
#define fail_migrate_page NULL
|
|
|
|
#endif /* CONFIG_MIGRATION */
|
|
|
|
#ifdef CONFIG_NUMA_BALANCING
|
|
extern int migrate_misplaced_page(struct page *page, int node);
|
|
extern int migrate_misplaced_page(struct page *page, int node);
|
|
extern bool migrate_ratelimited(int node);
|
|
#else
|
|
static inline int migrate_misplaced_page(struct page *page, int node)
|
|
{
|
|
return -EAGAIN; /* can't migrate now */
|
|
}
|
|
static inline bool migrate_ratelimited(int node)
|
|
{
|
|
return false;
|
|
}
|
|
#endif /* CONFIG_NUMA_BALANCING */
|
|
|
|
#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
|
|
extern int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
|
struct vm_area_struct *vma,
|
|
pmd_t *pmd, pmd_t entry,
|
|
unsigned long address,
|
|
struct page *page, int node);
|
|
#else
|
|
static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
|
struct vm_area_struct *vma,
|
|
pmd_t *pmd, pmd_t entry,
|
|
unsigned long address,
|
|
struct page *page, int node)
|
|
{
|
|
return -EAGAIN;
|
|
}
|
|
#endif /* CONFIG_NUMA_BALANCING && CONFIG_TRANSPARENT_HUGEPAGE*/
|
|
|
|
#endif /* _LINUX_MIGRATE_H */
|