Merge branch 'tracing/mmiotrace' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'tracing/mmiotrace' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86 mmiotrace: fix race with release_kmmio_fault_page()
  x86 mmiotrace: improve handling of secondary faults
  x86 mmiotrace: split set_page_presence()
  x86 mmiotrace: fix save/restore page table state
  x86 mmiotrace: WARN_ONCE if dis/arming a page fails
  x86: add far read test to testmmiotrace
  x86: count errors in testmmiotrace.ko
This commit is contained in:
Linus Torvalds 2009-03-03 14:32:37 -08:00
commit f2a4165526
2 changed files with 153 additions and 66 deletions

View file

@ -32,11 +32,14 @@ struct kmmio_fault_page {
struct list_head list; struct list_head list;
struct kmmio_fault_page *release_next; struct kmmio_fault_page *release_next;
unsigned long page; /* location of the fault page */ unsigned long page; /* location of the fault page */
bool old_presence; /* page presence prior to arming */
bool armed;
/* /*
* Number of times this page has been registered as a part * Number of times this page has been registered as a part
* of a probe. If zero, page is disarmed and this may be freed. * of a probe. If zero, page is disarmed and this may be freed.
* Used only by writers (RCU). * Used only by writers (RCU) and post_kmmio_handler().
* Protected by kmmio_lock, when linked into kmmio_page_table.
*/ */
int count; int count;
}; };
@ -105,57 +108,85 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
return NULL; return NULL;
} }
static void set_page_present(unsigned long addr, bool present, static void set_pmd_presence(pmd_t *pmd, bool present, bool *old)
unsigned int *pglevel) {
pmdval_t v = pmd_val(*pmd);
*old = !!(v & _PAGE_PRESENT);
v &= ~_PAGE_PRESENT;
if (present)
v |= _PAGE_PRESENT;
set_pmd(pmd, __pmd(v));
}
static void set_pte_presence(pte_t *pte, bool present, bool *old)
{
pteval_t v = pte_val(*pte);
*old = !!(v & _PAGE_PRESENT);
v &= ~_PAGE_PRESENT;
if (present)
v |= _PAGE_PRESENT;
set_pte_atomic(pte, __pte(v));
}
static int set_page_presence(unsigned long addr, bool present, bool *old)
{ {
pteval_t pteval;
pmdval_t pmdval;
unsigned int level; unsigned int level;
pmd_t *pmd;
pte_t *pte = lookup_address(addr, &level); pte_t *pte = lookup_address(addr, &level);
if (!pte) { if (!pte) {
pr_err("kmmio: no pte for page 0x%08lx\n", addr); pr_err("kmmio: no pte for page 0x%08lx\n", addr);
return; return -1;
} }
if (pglevel)
*pglevel = level;
switch (level) { switch (level) {
case PG_LEVEL_2M: case PG_LEVEL_2M:
pmd = (pmd_t *)pte; set_pmd_presence((pmd_t *)pte, present, old);
pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT;
if (present)
pmdval |= _PAGE_PRESENT;
set_pmd(pmd, __pmd(pmdval));
break; break;
case PG_LEVEL_4K: case PG_LEVEL_4K:
pteval = pte_val(*pte) & ~_PAGE_PRESENT; set_pte_presence(pte, present, old);
if (present)
pteval |= _PAGE_PRESENT;
set_pte_atomic(pte, __pte(pteval));
break; break;
default: default:
pr_err("kmmio: unexpected page level 0x%x.\n", level); pr_err("kmmio: unexpected page level 0x%x.\n", level);
return; return -1;
} }
__flush_tlb_one(addr); __flush_tlb_one(addr);
return 0;
} }
/** Mark the given page as not present. Access to it will trigger a fault. */ /*
static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) * Mark the given page as not present. Access to it will trigger a fault.
*
* Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the
* protection is ignored here. RCU read lock is assumed held, so the struct
* will not disappear unexpectedly. Furthermore, the caller must guarantee,
* that double arming the same virtual address (page) cannot occur.
*
* Double disarming on the other hand is allowed, and may occur when a fault
* and mmiotrace shutdown happen simultaneously.
*/
static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
{ {
set_page_present(page & PAGE_MASK, false, pglevel); int ret;
WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n");
if (f->armed) {
pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n",
f->page, f->count, f->old_presence);
}
ret = set_page_presence(f->page, false, &f->old_presence);
WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page);
f->armed = true;
return ret;
} }
/** Mark the given page as present. */ /** Restore the given page to saved presence state. */
static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
{ {
set_page_present(page & PAGE_MASK, true, pglevel); bool tmp;
int ret = set_page_presence(f->page, f->old_presence, &tmp);
WARN_ONCE(ret < 0,
KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
f->armed = false;
} }
/* /*
@ -202,28 +233,32 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
ctx = &get_cpu_var(kmmio_ctx); ctx = &get_cpu_var(kmmio_ctx);
if (ctx->active) { if (ctx->active) {
disarm_kmmio_fault_page(faultpage->page, NULL);
if (addr == ctx->addr) { if (addr == ctx->addr) {
/* /*
* On SMP we sometimes get recursive probe hits on the * A second fault on the same page means some other
* same address. Context is already saved, fall out. * condition needs handling by do_page_fault(), the
* page really not being present is the most common.
*/ */
pr_debug("kmmio: duplicate probe hit on CPU %d, for " pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n",
"address 0x%08lx.\n", addr, smp_processor_id());
smp_processor_id(), addr);
ret = 1; if (!faultpage->old_presence)
goto no_kmmio_ctx; pr_info("kmmio: unexpected secondary hit for "
} "address 0x%08lx on CPU %d.\n", addr,
/* smp_processor_id());
* Prevent overwriting already in-flight context. } else {
* This should not happen, let's hope disarming at least /*
* prevents a panic. * Prevent overwriting already in-flight context.
*/ * This should not happen, let's hope disarming at
pr_emerg("kmmio: recursive probe hit on CPU %d, " * least prevents a panic.
*/
pr_emerg("kmmio: recursive probe hit on CPU %d, "
"for address 0x%08lx. Ignoring.\n", "for address 0x%08lx. Ignoring.\n",
smp_processor_id(), addr); smp_processor_id(), addr);
pr_emerg("kmmio: previous hit was at 0x%08lx.\n", pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
ctx->addr); ctx->addr);
disarm_kmmio_fault_page(faultpage);
}
goto no_kmmio_ctx; goto no_kmmio_ctx;
} }
ctx->active++; ctx->active++;
@ -244,7 +279,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
regs->flags &= ~X86_EFLAGS_IF; regs->flags &= ~X86_EFLAGS_IF;
/* Now we set present bit in PTE and single step. */ /* Now we set present bit in PTE and single step. */
disarm_kmmio_fault_page(ctx->fpage->page, NULL); disarm_kmmio_fault_page(ctx->fpage);
/* /*
* If another cpu accesses the same page while we are stepping, * If another cpu accesses the same page while we are stepping,
@ -275,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
if (!ctx->active) { if (!ctx->active) {
pr_debug("kmmio: spurious debug trap on CPU %d.\n", pr_warning("kmmio: spurious debug trap on CPU %d.\n",
smp_processor_id()); smp_processor_id());
goto out; goto out;
} }
@ -283,7 +318,11 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
if (ctx->probe && ctx->probe->post_handler) if (ctx->probe && ctx->probe->post_handler)
ctx->probe->post_handler(ctx->probe, condition, regs); ctx->probe->post_handler(ctx->probe, condition, regs);
arm_kmmio_fault_page(ctx->fpage->page, NULL); /* Prevent racing against release_kmmio_fault_page(). */
spin_lock(&kmmio_lock);
if (ctx->fpage->count)
arm_kmmio_fault_page(ctx->fpage);
spin_unlock(&kmmio_lock);
regs->flags &= ~X86_EFLAGS_TF; regs->flags &= ~X86_EFLAGS_TF;
regs->flags |= ctx->saved_flags; regs->flags |= ctx->saved_flags;
@ -315,20 +354,24 @@ static int add_kmmio_fault_page(unsigned long page)
f = get_kmmio_fault_page(page); f = get_kmmio_fault_page(page);
if (f) { if (f) {
if (!f->count) if (!f->count)
arm_kmmio_fault_page(f->page, NULL); arm_kmmio_fault_page(f);
f->count++; f->count++;
return 0; return 0;
} }
f = kmalloc(sizeof(*f), GFP_ATOMIC); f = kzalloc(sizeof(*f), GFP_ATOMIC);
if (!f) if (!f)
return -1; return -1;
f->count = 1; f->count = 1;
f->page = page; f->page = page;
list_add_rcu(&f->list, kmmio_page_list(f->page));
arm_kmmio_fault_page(f->page, NULL); if (arm_kmmio_fault_page(f)) {
kfree(f);
return -1;
}
list_add_rcu(&f->list, kmmio_page_list(f->page));
return 0; return 0;
} }
@ -347,7 +390,7 @@ static void release_kmmio_fault_page(unsigned long page,
f->count--; f->count--;
BUG_ON(f->count < 0); BUG_ON(f->count < 0);
if (!f->count) { if (!f->count) {
disarm_kmmio_fault_page(f->page, NULL); disarm_kmmio_fault_page(f);
f->release_next = *release_list; f->release_next = *release_list;
*release_list = f; *release_list = f;
} }

View file

@ -1,5 +1,5 @@
/* /*
* Written by Pekka Paalanen, 2008 <pq@iki.fi> * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi>
*/ */
#include <linux/module.h> #include <linux/module.h>
#include <linux/io.h> #include <linux/io.h>
@ -9,35 +9,74 @@
static unsigned long mmio_address; static unsigned long mmio_address;
module_param(mmio_address, ulong, 0); module_param(mmio_address, ulong, 0);
MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB "
"(or 8 MB if read_far is non-zero).");
static unsigned long read_far = 0x400100;
module_param(read_far, ulong, 0);
MODULE_PARM_DESC(read_far, " Offset of a 32-bit read within 8 MB "
"(default: 0x400100).");
static unsigned v16(unsigned i)
{
return i * 12 + 7;
}
static unsigned v32(unsigned i)
{
return i * 212371 + 13;
}
static void do_write_test(void __iomem *p) static void do_write_test(void __iomem *p)
{ {
unsigned int i; unsigned int i;
pr_info(MODULE_NAME ": write test.\n");
mmiotrace_printk("Write test.\n"); mmiotrace_printk("Write test.\n");
for (i = 0; i < 256; i++) for (i = 0; i < 256; i++)
iowrite8(i, p + i); iowrite8(i, p + i);
for (i = 1024; i < (5 * 1024); i += 2) for (i = 1024; i < (5 * 1024); i += 2)
iowrite16(i * 12 + 7, p + i); iowrite16(v16(i), p + i);
for (i = (5 * 1024); i < (16 * 1024); i += 4) for (i = (5 * 1024); i < (16 * 1024); i += 4)
iowrite32(i * 212371 + 13, p + i); iowrite32(v32(i), p + i);
} }
static void do_read_test(void __iomem *p) static void do_read_test(void __iomem *p)
{ {
unsigned int i; unsigned int i;
unsigned errs[3] = { 0 };
pr_info(MODULE_NAME ": read test.\n");
mmiotrace_printk("Read test.\n"); mmiotrace_printk("Read test.\n");
for (i = 0; i < 256; i++) for (i = 0; i < 256; i++)
ioread8(p + i); if (ioread8(p + i) != i)
++errs[0];
for (i = 1024; i < (5 * 1024); i += 2) for (i = 1024; i < (5 * 1024); i += 2)
ioread16(p + i); if (ioread16(p + i) != v16(i))
++errs[1];
for (i = (5 * 1024); i < (16 * 1024); i += 4) for (i = (5 * 1024); i < (16 * 1024); i += 4)
ioread32(p + i); if (ioread32(p + i) != v32(i))
++errs[2];
mmiotrace_printk("Read errors: 8-bit %d, 16-bit %d, 32-bit %d.\n",
errs[0], errs[1], errs[2]);
} }
static void do_test(void) static void do_read_far_test(void __iomem *p)
{ {
void __iomem *p = ioremap_nocache(mmio_address, 0x4000); pr_info(MODULE_NAME ": read far test.\n");
mmiotrace_printk("Read far test.\n");
ioread32(p + read_far);
}
static void do_test(unsigned long size)
{
void __iomem *p = ioremap_nocache(mmio_address, size);
if (!p) { if (!p) {
pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
return; return;
@ -45,11 +84,15 @@ static void do_test(void)
mmiotrace_printk("ioremap returned %p.\n", p); mmiotrace_printk("ioremap returned %p.\n", p);
do_write_test(p); do_write_test(p);
do_read_test(p); do_read_test(p);
if (read_far && read_far < size - 4)
do_read_far_test(p);
iounmap(p); iounmap(p);
} }
static int __init init(void) static int __init init(void)
{ {
unsigned long size = (read_far) ? (8 << 20) : (16 << 10);
if (mmio_address == 0) { if (mmio_address == 0) {
pr_err(MODULE_NAME ": you have to use the module argument " pr_err(MODULE_NAME ": you have to use the module argument "
"mmio_address.\n"); "mmio_address.\n");
@ -58,10 +101,11 @@ static int __init init(void)
return -ENXIO; return -ENXIO;
} }
pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI "
"in PCI address space, and writing " "address space, and writing 16 kB of rubbish in there.\n",
"rubbish in there.\n", mmio_address); size >> 10, mmio_address);
do_test(); do_test(size);
pr_info(MODULE_NAME ": All done.\n");
return 0; return 0;
} }