mm, devm_memremap_pages: fix shutdown handling
The last step before devm_memremap_pages() returns success is to allocate
a release action, devm_memremap_pages_release(), to tear the entire setup
down. However, the result from devm_add_action() is not checked.
Checking the error from devm_add_action() is not enough. The api
currently relies on the fact that the percpu_ref it is using is killed by
the time the devm_memremap_pages_release() is run. Rather than continue
this awkward situation, offload the responsibility of killing the
percpu_ref to devm_memremap_pages_release() directly. This allows
devm_memremap_pages() to do the right thing relative to init failures and
shutdown.
Without this change we could fail to register the teardown of
devm_memremap_pages(). The likelihood of hitting this failure is tiny as
small memory allocations almost always succeed. However, the impact of
the failure is large given any future reconfiguration, or disable/enable,
of an nvdimm namespace will fail forever as subsequent calls to
devm_memremap_pages() will fail to setup the pgmap_radix since there will
be stale entries for the physical address range.
An argument could be made to require that the ->kill() operation be set in
the @pgmap arg rather than passed in separately. However, it helps code
readability, tracking the lifetime of a given instance, to be able to grep
the kill routine directly at the devm_memremap_pages() call site.
Link: http://lkml.kernel.org/r/154275558526.76910.7535251937849268605.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Fixes: e8d5134833
("memremap: change devm_memremap_pages interface...")
Reviewed-by: "Jérôme Glisse" <jglisse@redhat.com>
Reported-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
06489cfbd9
commit
a95c90f1e2
5 changed files with 38 additions and 36 deletions
|
@ -48,9 +48,8 @@ static void dax_pmem_percpu_exit(void *data)
|
|||
percpu_ref_exit(ref);
|
||||
}
|
||||
|
||||
static void dax_pmem_percpu_kill(void *data)
|
||||
static void dax_pmem_percpu_kill(struct percpu_ref *ref)
|
||||
{
|
||||
struct percpu_ref *ref = data;
|
||||
struct dax_pmem *dax_pmem = to_dax_pmem(ref);
|
||||
|
||||
dev_dbg(dax_pmem->dev, "trace\n");
|
||||
|
@ -112,17 +111,10 @@ static int dax_pmem_probe(struct device *dev)
|
|||
}
|
||||
|
||||
dax_pmem->pgmap.ref = &dax_pmem->ref;
|
||||
dax_pmem->pgmap.kill = dax_pmem_percpu_kill;
|
||||
addr = devm_memremap_pages(dev, &dax_pmem->pgmap);
|
||||
if (IS_ERR(addr)) {
|
||||
devm_remove_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref);
|
||||
percpu_ref_exit(&dax_pmem->ref);
|
||||
if (IS_ERR(addr))
|
||||
return PTR_ERR(addr);
|
||||
}
|
||||
|
||||
rc = devm_add_action_or_reset(dev, dax_pmem_percpu_kill,
|
||||
&dax_pmem->ref);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
/* adjust the dax_region resource to the start of data */
|
||||
memcpy(&res, &dax_pmem->pgmap.res, sizeof(res));
|
||||
|
|
|
@ -309,8 +309,11 @@ static void pmem_release_queue(void *q)
|
|||
blk_cleanup_queue(q);
|
||||
}
|
||||
|
||||
static void pmem_freeze_queue(void *q)
|
||||
static void pmem_freeze_queue(struct percpu_ref *ref)
|
||||
{
|
||||
struct request_queue *q;
|
||||
|
||||
q = container_of(ref, typeof(*q), q_usage_counter);
|
||||
blk_freeze_queue_start(q);
|
||||
}
|
||||
|
||||
|
@ -402,6 +405,7 @@ static int pmem_attach_disk(struct device *dev,
|
|||
|
||||
pmem->pfn_flags = PFN_DEV;
|
||||
pmem->pgmap.ref = &q->q_usage_counter;
|
||||
pmem->pgmap.kill = pmem_freeze_queue;
|
||||
if (is_nd_pfn(dev)) {
|
||||
if (setup_pagemap_fsdax(dev, &pmem->pgmap))
|
||||
return -ENOMEM;
|
||||
|
@ -427,13 +431,6 @@ static int pmem_attach_disk(struct device *dev,
|
|||
memcpy(&bb_res, &nsio->res, sizeof(bb_res));
|
||||
}
|
||||
|
||||
/*
|
||||
* At release time the queue must be frozen before
|
||||
* devm_memremap_pages is unwound
|
||||
*/
|
||||
if (devm_add_action_or_reset(dev, pmem_freeze_queue, q))
|
||||
return -ENOMEM;
|
||||
|
||||
if (IS_ERR(addr))
|
||||
return PTR_ERR(addr);
|
||||
pmem->virt_addr = addr;
|
||||
|
|
|
@ -111,6 +111,7 @@ typedef void (*dev_page_free_t)(struct page *page, void *data);
|
|||
* @altmap: pre-allocated/reserved memory for vmemmap allocations
|
||||
* @res: physical address range covered by @ref
|
||||
* @ref: reference count that pins the devm_memremap_pages() mapping
|
||||
* @kill: callback to transition @ref to the dead state
|
||||
* @dev: host device of the mapping for debug
|
||||
* @data: private data pointer for page_free()
|
||||
* @type: memory type: see MEMORY_* in memory_hotplug.h
|
||||
|
@ -122,6 +123,7 @@ struct dev_pagemap {
|
|||
bool altmap_valid;
|
||||
struct resource res;
|
||||
struct percpu_ref *ref;
|
||||
void (*kill)(struct percpu_ref *ref);
|
||||
struct device *dev;
|
||||
void *data;
|
||||
enum memory_type type;
|
||||
|
|
|
@ -88,14 +88,10 @@ static void devm_memremap_pages_release(void *data)
|
|||
resource_size_t align_start, align_size;
|
||||
unsigned long pfn;
|
||||
|
||||
pgmap->kill(pgmap->ref);
|
||||
for_each_device_pfn(pfn, pgmap)
|
||||
put_page(pfn_to_page(pfn));
|
||||
|
||||
if (percpu_ref_tryget_live(pgmap->ref)) {
|
||||
dev_WARN(dev, "%s: page mapping is still live!\n", __func__);
|
||||
percpu_ref_put(pgmap->ref);
|
||||
}
|
||||
|
||||
/* pages are dead and unused, undo the arch mapping */
|
||||
align_start = res->start & ~(SECTION_SIZE - 1);
|
||||
align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
|
||||
|
@ -116,7 +112,7 @@ static void devm_memremap_pages_release(void *data)
|
|||
/**
|
||||
* devm_memremap_pages - remap and provide memmap backing for the given resource
|
||||
* @dev: hosting device for @res
|
||||
* @pgmap: pointer to a struct dev_pgmap
|
||||
* @pgmap: pointer to a struct dev_pagemap
|
||||
*
|
||||
* Notes:
|
||||
* 1/ At a minimum the res, ref and type members of @pgmap must be initialized
|
||||
|
@ -125,11 +121,8 @@ static void devm_memremap_pages_release(void *data)
|
|||
* 2/ The altmap field may optionally be initialized, in which case altmap_valid
|
||||
* must be set to true
|
||||
*
|
||||
* 3/ pgmap.ref must be 'live' on entry and 'dead' before devm_memunmap_pages()
|
||||
* time (or devm release event). The expected order of events is that ref has
|
||||
* been through percpu_ref_kill() before devm_memremap_pages_release(). The
|
||||
* wait for the completion of all references being dropped and
|
||||
* percpu_ref_exit() must occur after devm_memremap_pages_release().
|
||||
* 3/ pgmap->ref must be 'live' on entry and will be killed at
|
||||
* devm_memremap_pages_release() time, or if this routine fails.
|
||||
*
|
||||
* 4/ res is expected to be a host memory range that could feasibly be
|
||||
* treated as a "System RAM" range, i.e. not a device mmio range, but
|
||||
|
@ -145,6 +138,9 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
|
|||
pgprot_t pgprot = PAGE_KERNEL;
|
||||
int error, nid, is_ram;
|
||||
|
||||
if (!pgmap->ref || !pgmap->kill)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
align_start = res->start & ~(SECTION_SIZE - 1);
|
||||
align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
|
||||
- align_start;
|
||||
|
@ -170,12 +166,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
|
|||
if (is_ram != REGION_DISJOINT) {
|
||||
WARN_ONCE(1, "%s attempted on %s region %pr\n", __func__,
|
||||
is_ram == REGION_MIXED ? "mixed" : "ram", res);
|
||||
return ERR_PTR(-ENXIO);
|
||||
error = -ENXIO;
|
||||
goto err_array;
|
||||
}
|
||||
|
||||
if (!pgmap->ref)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
pgmap->dev = dev;
|
||||
|
||||
error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(res->start),
|
||||
|
@ -217,7 +211,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
|
|||
align_size >> PAGE_SHIFT, pgmap);
|
||||
percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap));
|
||||
|
||||
devm_add_action(dev, devm_memremap_pages_release, pgmap);
|
||||
error = devm_add_action_or_reset(dev, devm_memremap_pages_release,
|
||||
pgmap);
|
||||
if (error)
|
||||
return ERR_PTR(error);
|
||||
|
||||
return __va(res->start);
|
||||
|
||||
|
@ -228,6 +225,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
|
|||
err_pfn_remap:
|
||||
pgmap_array_delete(res);
|
||||
err_array:
|
||||
pgmap->kill(pgmap->ref);
|
||||
return ERR_PTR(error);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(devm_memremap_pages);
|
||||
|
|
|
@ -104,13 +104,26 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
|
|||
}
|
||||
EXPORT_SYMBOL(__wrap_devm_memremap);
|
||||
|
||||
static void nfit_test_kill(void *_pgmap)
|
||||
{
|
||||
struct dev_pagemap *pgmap = _pgmap;
|
||||
|
||||
pgmap->kill(pgmap->ref);
|
||||
}
|
||||
|
||||
void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
|
||||
{
|
||||
resource_size_t offset = pgmap->res.start;
|
||||
struct nfit_test_resource *nfit_res = get_nfit_res(offset);
|
||||
|
||||
if (nfit_res)
|
||||
if (nfit_res) {
|
||||
int rc;
|
||||
|
||||
rc = devm_add_action_or_reset(dev, nfit_test_kill, pgmap);
|
||||
if (rc)
|
||||
return ERR_PTR(rc);
|
||||
return nfit_res->buf + offset - nfit_res->res.start;
|
||||
}
|
||||
return devm_memremap_pages(dev, pgmap);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__wrap_devm_memremap_pages);
|
||||
|
|
Loading…
Reference in a new issue