mm: let swap use exceptional entries
If swap entries are to be stored along with struct page pointers in a radix tree, they need to be distinguished as exceptional entries. Most of the handling of swap entries in radix tree will be contained in shmem.c, but a few functions in filemap.c's common code need to check for their appearance: find_get_page(), find_lock_page(), find_get_pages() and find_get_pages_contig(). So as not to slow their fast paths, tuck those checks inside the existing checks for unlikely radix_tree_deref_slot(); except for find_lock_page(), where it is an added test. And make it a BUG in find_get_pages_tag(), which is not applied to tmpfs files. A part of the reason for eliminating shmem_readpage() earlier, was to minimize the places where common code would need to allow for swap entries. The swp_entry_t known to swapfile.c must be massaged into a slightly different form when stored in the radix tree, just as it gets massaged into a pte_t when stored in page tables. In an i386 kernel this limits its information (type and page offset) to 30 bits: given 32 "types" of swapfile and 4kB pagesize, that's a maximum swapfile size of 128GB. Which is less than the 512GB we previously allowed with X86_PAE (where the swap entry can occupy the entire upper 32 bits of a pte_t), but not a new limitation on 32-bit without PAE; and there's not a new limitation on 64-bit (where swap filesize is already limited to 16TB by a 32-bit page offset). Thirty areas of 128GB is probably still enough swap for a 64GB 32-bit machine. Provide swp_to_radix_entry() and radix_to_swp_entry() conversions, and enforce filesize limit in read_swap_header(), just as for ptes. Signed-off-by: Hugh Dickins <hughd@google.com> Acked-by: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
6328650bb4
commit
a2c16d6cb0
3 changed files with 66 additions and 26 deletions
|
@ -1,3 +1,8 @@
|
|||
#ifndef _LINUX_SWAPOPS_H
|
||||
#define _LINUX_SWAPOPS_H
|
||||
|
||||
#include <linux/radix-tree.h>
|
||||
|
||||
/*
|
||||
* swapcache pages are stored in the swapper_space radix tree. We want to
|
||||
* get good packing density in that tree, so the index should be dense in
|
||||
|
@ -76,6 +81,22 @@ static inline pte_t swp_entry_to_pte(swp_entry_t entry)
|
|||
return __swp_entry_to_pte(arch_entry);
|
||||
}
|
||||
|
||||
static inline swp_entry_t radix_to_swp_entry(void *arg)
|
||||
{
|
||||
swp_entry_t entry;
|
||||
|
||||
entry.val = (unsigned long)arg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
|
||||
return entry;
|
||||
}
|
||||
|
||||
static inline void *swp_to_radix_entry(swp_entry_t entry)
|
||||
{
|
||||
unsigned long value;
|
||||
|
||||
value = entry.val << RADIX_TREE_EXCEPTIONAL_SHIFT;
|
||||
return (void *)(value | RADIX_TREE_EXCEPTIONAL_ENTRY);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MIGRATION
|
||||
static inline swp_entry_t make_migration_entry(struct page *page, int write)
|
||||
{
|
||||
|
@ -169,3 +190,5 @@ static inline int non_swap_entry(swp_entry_t entry)
|
|||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_SWAPOPS_H */
|
||||
|
|
49
mm/filemap.c
49
mm/filemap.c
|
@ -714,9 +714,12 @@ repeat:
|
|||
page = radix_tree_deref_slot(pagep);
|
||||
if (unlikely(!page))
|
||||
goto out;
|
||||
if (radix_tree_deref_retry(page))
|
||||
if (radix_tree_exception(page)) {
|
||||
if (radix_tree_exceptional_entry(page))
|
||||
goto out;
|
||||
/* radix_tree_deref_retry(page) */
|
||||
goto repeat;
|
||||
|
||||
}
|
||||
if (!page_cache_get_speculative(page))
|
||||
goto repeat;
|
||||
|
||||
|
@ -753,7 +756,7 @@ struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
|
|||
|
||||
repeat:
|
||||
page = find_get_page(mapping, offset);
|
||||
if (page) {
|
||||
if (page && !radix_tree_exception(page)) {
|
||||
lock_page(page);
|
||||
/* Has the page been truncated? */
|
||||
if (unlikely(page->mapping != mapping)) {
|
||||
|
@ -849,11 +852,14 @@ repeat:
|
|||
if (unlikely(!page))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* This can only trigger when the entry at index 0 moves out
|
||||
* of or back to the root: none yet gotten, safe to restart.
|
||||
*/
|
||||
if (radix_tree_deref_retry(page)) {
|
||||
if (radix_tree_exception(page)) {
|
||||
if (radix_tree_exceptional_entry(page))
|
||||
continue;
|
||||
/*
|
||||
* radix_tree_deref_retry(page):
|
||||
* can only trigger when entry at index 0 moves out of
|
||||
* or back to root: none yet gotten, safe to restart.
|
||||
*/
|
||||
WARN_ON(start | i);
|
||||
goto restart;
|
||||
}
|
||||
|
@ -912,12 +918,16 @@ repeat:
|
|||
if (unlikely(!page))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* This can only trigger when the entry at index 0 moves out
|
||||
* of or back to the root: none yet gotten, safe to restart.
|
||||
*/
|
||||
if (radix_tree_deref_retry(page))
|
||||
if (radix_tree_exception(page)) {
|
||||
if (radix_tree_exceptional_entry(page))
|
||||
break;
|
||||
/*
|
||||
* radix_tree_deref_retry(page):
|
||||
* can only trigger when entry at index 0 moves out of
|
||||
* or back to root: none yet gotten, safe to restart.
|
||||
*/
|
||||
goto restart;
|
||||
}
|
||||
|
||||
if (!page_cache_get_speculative(page))
|
||||
goto repeat;
|
||||
|
@ -977,12 +987,15 @@ repeat:
|
|||
if (unlikely(!page))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* This can only trigger when the entry at index 0 moves out
|
||||
* of or back to the root: none yet gotten, safe to restart.
|
||||
*/
|
||||
if (radix_tree_deref_retry(page))
|
||||
if (radix_tree_exception(page)) {
|
||||
BUG_ON(radix_tree_exceptional_entry(page));
|
||||
/*
|
||||
* radix_tree_deref_retry(page):
|
||||
* can only trigger when entry at index 0 moves out of
|
||||
* or back to root: none yet gotten, safe to restart.
|
||||
*/
|
||||
goto restart;
|
||||
}
|
||||
|
||||
if (!page_cache_get_speculative(page))
|
||||
goto repeat;
|
||||
|
|
|
@ -1924,20 +1924,24 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
|
|||
|
||||
/*
|
||||
* Find out how many pages are allowed for a single swap
|
||||
* device. There are two limiting factors: 1) the number of
|
||||
* bits for the swap offset in the swp_entry_t type and
|
||||
* 2) the number of bits in the a swap pte as defined by
|
||||
* the different architectures. In order to find the
|
||||
* largest possible bit mask a swap entry with swap type 0
|
||||
* device. There are three limiting factors: 1) the number
|
||||
* of bits for the swap offset in the swp_entry_t type, and
|
||||
* 2) the number of bits in the swap pte as defined by the
|
||||
* the different architectures, and 3) the number of free bits
|
||||
* in an exceptional radix_tree entry. In order to find the
|
||||
* largest possible bit mask, a swap entry with swap type 0
|
||||
* and swap offset ~0UL is created, encoded to a swap pte,
|
||||
* decoded to a swp_entry_t again and finally the swap
|
||||
* decoded to a swp_entry_t again, and finally the swap
|
||||
* offset is extracted. This will mask all the bits from
|
||||
* the initial ~0UL mask that can't be encoded in either
|
||||
* the swp_entry_t or the architecture definition of a
|
||||
* swap pte.
|
||||
* swap pte. Then the same is done for a radix_tree entry.
|
||||
*/
|
||||
maxpages = swp_offset(pte_to_swp_entry(
|
||||
swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
|
||||
swp_entry_to_pte(swp_entry(0, ~0UL))));
|
||||
maxpages = swp_offset(radix_to_swp_entry(
|
||||
swp_to_radix_entry(swp_entry(0, maxpages)))) + 1;
|
||||
|
||||
if (maxpages > swap_header->info.last_page) {
|
||||
maxpages = swap_header->info.last_page + 1;
|
||||
/* p->max is an unsigned int: don't overflow it */
|
||||
|
|
Loading…
Reference in a new issue