xen/evtchn: improve scalability by using per-user locks

The global array of port users and the port_user_lock limits
scalability of the evtchn device.  Instead of the global array lookup,
use a per-use (per-fd) tree of event channels bound by that user and
protect the tree with a per-user lock.

This is also a prerequiste for extended the number of supported event
channels, by removing the fixed size, per-event channel array.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
This commit is contained in:
David Vrabel 2013-07-19 15:52:00 +01:00 committed by Konrad Rzeszutek Wilk
parent 65a45fa2f6
commit 73cc4bb0c7

View file

@ -57,6 +57,7 @@
struct per_user_data { struct per_user_data {
struct mutex bind_mutex; /* serialize bind/unbind operations */ struct mutex bind_mutex; /* serialize bind/unbind operations */
struct rb_root evtchns;
/* Notification ring, accessed via /dev/xen/evtchn. */ /* Notification ring, accessed via /dev/xen/evtchn. */
#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) #define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t))
@ -64,6 +65,7 @@ struct per_user_data {
evtchn_port_t *ring; evtchn_port_t *ring;
unsigned int ring_cons, ring_prod, ring_overflow; unsigned int ring_cons, ring_prod, ring_overflow;
struct mutex ring_cons_mutex; /* protect against concurrent readers */ struct mutex ring_cons_mutex; /* protect against concurrent readers */
spinlock_t ring_prod_lock; /* product against concurrent interrupts */
/* Processes wait on this queue when ring is empty. */ /* Processes wait on this queue when ring is empty. */
wait_queue_head_t evtchn_wait; wait_queue_head_t evtchn_wait;
@ -71,54 +73,79 @@ struct per_user_data {
const char *name; const char *name;
}; };
/* struct user_evtchn {
* Who's bound to each port? This is logically an array of struct struct rb_node node;
* per_user_data *, but we encode the current enabled-state in bit 0. struct per_user_data *user;
*/ unsigned port;
static unsigned long *port_user; bool enabled;
static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */ };
static inline struct per_user_data *get_port_user(unsigned port) static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
{ {
return (struct per_user_data *)(port_user[port] & ~1); struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL;
while (*new) {
struct user_evtchn *this;
this = container_of(*new, struct user_evtchn, node);
parent = *new;
if (this->port < evtchn->port)
new = &((*new)->rb_left);
else if (this->port > evtchn->port)
new = &((*new)->rb_right);
else
return -EEXIST;
}
/* Add new node and rebalance tree. */
rb_link_node(&evtchn->node, parent, new);
rb_insert_color(&evtchn->node, &u->evtchns);
return 0;
} }
static inline void set_port_user(unsigned port, struct per_user_data *u) static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
{ {
port_user[port] = (unsigned long)u; rb_erase(&evtchn->node, &u->evtchns);
kfree(evtchn);
} }
static inline bool get_port_enabled(unsigned port) static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port)
{ {
return port_user[port] & 1; struct rb_node *node = u->evtchns.rb_node;
}
static inline void set_port_enabled(unsigned port, bool enabled) while (node) {
{ struct user_evtchn *evtchn;
if (enabled)
port_user[port] |= 1; evtchn = container_of(node, struct user_evtchn, node);
else
port_user[port] &= ~1; if (evtchn->port < port)
node = node->rb_left;
else if (evtchn->port > port)
node = node->rb_right;
else
return evtchn;
}
return NULL;
} }
static irqreturn_t evtchn_interrupt(int irq, void *data) static irqreturn_t evtchn_interrupt(int irq, void *data)
{ {
unsigned int port = (unsigned long)data; struct user_evtchn *evtchn = data;
struct per_user_data *u; struct per_user_data *u = evtchn->user;
spin_lock(&port_user_lock); WARN(!evtchn->enabled,
u = get_port_user(port);
WARN(!get_port_enabled(port),
"Interrupt for port %d, but apparently not enabled; per-user %p\n", "Interrupt for port %d, but apparently not enabled; per-user %p\n",
port, u); evtchn->port, u);
disable_irq_nosync(irq); disable_irq_nosync(irq);
set_port_enabled(port, false); evtchn->enabled = false;
spin_lock(&u->ring_prod_lock);
if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port;
wmb(); /* Ensure ring contents visible */ wmb(); /* Ensure ring contents visible */
if (u->ring_cons == u->ring_prod++) { if (u->ring_cons == u->ring_prod++) {
wake_up_interruptible(&u->evtchn_wait); wake_up_interruptible(&u->evtchn_wait);
@ -128,7 +155,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
} else } else
u->ring_overflow = 1; u->ring_overflow = 1;
spin_unlock(&port_user_lock); spin_unlock(&u->ring_prod_lock);
return IRQ_HANDLED; return IRQ_HANDLED;
} }
@ -229,20 +256,20 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
if (copy_from_user(kbuf, buf, count) != 0) if (copy_from_user(kbuf, buf, count) != 0)
goto out; goto out;
spin_lock_irq(&port_user_lock); mutex_lock(&u->bind_mutex);
for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) {
unsigned port = kbuf[i]; unsigned port = kbuf[i];
struct user_evtchn *evtchn;
if (port < NR_EVENT_CHANNELS && evtchn = find_evtchn(u, port);
get_port_user(port) == u && if (evtchn && !evtchn->enabled) {
!get_port_enabled(port)) { evtchn->enabled = true;
set_port_enabled(port, true);
enable_irq(irq_from_evtchn(port)); enable_irq(irq_from_evtchn(port));
} }
} }
spin_unlock_irq(&port_user_lock); mutex_unlock(&u->bind_mutex);
rc = count; rc = count;
@ -253,6 +280,8 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
static int evtchn_bind_to_user(struct per_user_data *u, int port) static int evtchn_bind_to_user(struct per_user_data *u, int port)
{ {
struct user_evtchn *evtchn;
struct evtchn_close close;
int rc = 0; int rc = 0;
/* /*
@ -263,35 +292,47 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
* interrupt handler yet, and our caller has already * interrupt handler yet, and our caller has already
* serialized bind operations.) * serialized bind operations.)
*/ */
BUG_ON(get_port_user(port) != NULL);
set_port_user(port, u); evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL);
set_port_enabled(port, true); /* start enabled */ if (!evtchn)
return -ENOMEM;
evtchn->user = u;
evtchn->port = port;
evtchn->enabled = true; /* start enabled */
rc = add_evtchn(u, evtchn);
if (rc < 0)
goto err;
rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
u->name, (void *)(unsigned long)port); u->name, evtchn);
if (rc >= 0) if (rc < 0)
rc = evtchn_make_refcounted(port); goto err;
else {
/* bind failed, should close the port now */
struct evtchn_close close;
close.port = port;
if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
BUG();
set_port_user(port, NULL);
}
rc = evtchn_make_refcounted(port);
return rc;
err:
/* bind failed, should close the port now */
close.port = port;
if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
BUG();
del_evtchn(u, evtchn);
kfree(evtchn);
return rc; return rc;
} }
static void evtchn_unbind_from_user(struct per_user_data *u, int port) static void evtchn_unbind_from_user(struct per_user_data *u,
struct user_evtchn *evtchn)
{ {
int irq = irq_from_evtchn(port); int irq = irq_from_evtchn(evtchn->port);
BUG_ON(irq < 0); BUG_ON(irq < 0);
unbind_from_irqhandler(irq, (void *)(unsigned long)port); unbind_from_irqhandler(irq, evtchn);
set_port_user(port, NULL); del_evtchn(u, evtchn);
} }
static long evtchn_ioctl(struct file *file, static long evtchn_ioctl(struct file *file,
@ -370,6 +411,7 @@ static long evtchn_ioctl(struct file *file,
case IOCTL_EVTCHN_UNBIND: { case IOCTL_EVTCHN_UNBIND: {
struct ioctl_evtchn_unbind unbind; struct ioctl_evtchn_unbind unbind;
struct user_evtchn *evtchn;
rc = -EFAULT; rc = -EFAULT;
if (copy_from_user(&unbind, uarg, sizeof(unbind))) if (copy_from_user(&unbind, uarg, sizeof(unbind)))
@ -380,29 +422,27 @@ static long evtchn_ioctl(struct file *file,
break; break;
rc = -ENOTCONN; rc = -ENOTCONN;
if (get_port_user(unbind.port) != u) evtchn = find_evtchn(u, unbind.port);
if (!evtchn)
break; break;
disable_irq(irq_from_evtchn(unbind.port)); disable_irq(irq_from_evtchn(unbind.port));
evtchn_unbind_from_user(u, evtchn);
evtchn_unbind_from_user(u, unbind.port);
rc = 0; rc = 0;
break; break;
} }
case IOCTL_EVTCHN_NOTIFY: { case IOCTL_EVTCHN_NOTIFY: {
struct ioctl_evtchn_notify notify; struct ioctl_evtchn_notify notify;
struct user_evtchn *evtchn;
rc = -EFAULT; rc = -EFAULT;
if (copy_from_user(&notify, uarg, sizeof(notify))) if (copy_from_user(&notify, uarg, sizeof(notify)))
break; break;
if (notify.port >= NR_EVENT_CHANNELS) { rc = -ENOTCONN;
rc = -EINVAL; evtchn = find_evtchn(u, notify.port);
} else if (get_port_user(notify.port) != u) { if (evtchn) {
rc = -ENOTCONN;
} else {
notify_remote_via_evtchn(notify.port); notify_remote_via_evtchn(notify.port);
rc = 0; rc = 0;
} }
@ -412,9 +452,9 @@ static long evtchn_ioctl(struct file *file,
case IOCTL_EVTCHN_RESET: { case IOCTL_EVTCHN_RESET: {
/* Initialise the ring to empty. Clear errors. */ /* Initialise the ring to empty. Clear errors. */
mutex_lock(&u->ring_cons_mutex); mutex_lock(&u->ring_cons_mutex);
spin_lock_irq(&port_user_lock); spin_lock_irq(&u->ring_prod_lock);
u->ring_cons = u->ring_prod = u->ring_overflow = 0; u->ring_cons = u->ring_prod = u->ring_overflow = 0;
spin_unlock_irq(&port_user_lock); spin_unlock_irq(&u->ring_prod_lock);
mutex_unlock(&u->ring_cons_mutex); mutex_unlock(&u->ring_cons_mutex);
rc = 0; rc = 0;
break; break;
@ -473,6 +513,7 @@ static int evtchn_open(struct inode *inode, struct file *filp)
mutex_init(&u->bind_mutex); mutex_init(&u->bind_mutex);
mutex_init(&u->ring_cons_mutex); mutex_init(&u->ring_cons_mutex);
spin_lock_init(&u->ring_prod_lock);
filp->private_data = u; filp->private_data = u;
@ -481,15 +522,15 @@ static int evtchn_open(struct inode *inode, struct file *filp)
static int evtchn_release(struct inode *inode, struct file *filp) static int evtchn_release(struct inode *inode, struct file *filp)
{ {
int i;
struct per_user_data *u = filp->private_data; struct per_user_data *u = filp->private_data;
struct rb_node *node;
for (i = 0; i < NR_EVENT_CHANNELS; i++) { while ((node = u->evtchns.rb_node)) {
if (get_port_user(i) != u) struct user_evtchn *evtchn;
continue;
disable_irq(irq_from_evtchn(i)); evtchn = rb_entry(node, struct user_evtchn, node);
evtchn_unbind_from_user(get_port_user(i), i); disable_irq(irq_from_evtchn(evtchn->port));
evtchn_unbind_from_user(u, evtchn);
} }
free_page((unsigned long)u->ring); free_page((unsigned long)u->ring);
@ -523,12 +564,6 @@ static int __init evtchn_init(void)
if (!xen_domain()) if (!xen_domain())
return -ENODEV; return -ENODEV;
port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL);
if (port_user == NULL)
return -ENOMEM;
spin_lock_init(&port_user_lock);
/* Create '/dev/xen/evtchn'. */ /* Create '/dev/xen/evtchn'. */
err = misc_register(&evtchn_miscdev); err = misc_register(&evtchn_miscdev);
if (err != 0) { if (err != 0) {
@ -543,9 +578,6 @@ static int __init evtchn_init(void)
static void __exit evtchn_cleanup(void) static void __exit evtchn_cleanup(void)
{ {
kfree(port_user);
port_user = NULL;
misc_deregister(&evtchn_miscdev); misc_deregister(&evtchn_miscdev);
} }