xen/evtchn: improve scalability by using per-user locks
The global array of port users and the port_user_lock limits scalability of the evtchn device. Instead of the global array lookup, use a per-use (per-fd) tree of event channels bound by that user and protect the tree with a per-user lock. This is also a prerequiste for extended the number of supported event channels, by removing the fixed size, per-event channel array. Signed-off-by: David Vrabel <david.vrabel@citrix.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
This commit is contained in:
parent
65a45fa2f6
commit
73cc4bb0c7
1 changed files with 112 additions and 80 deletions
|
@ -57,6 +57,7 @@
|
||||||
|
|
||||||
struct per_user_data {
|
struct per_user_data {
|
||||||
struct mutex bind_mutex; /* serialize bind/unbind operations */
|
struct mutex bind_mutex; /* serialize bind/unbind operations */
|
||||||
|
struct rb_root evtchns;
|
||||||
|
|
||||||
/* Notification ring, accessed via /dev/xen/evtchn. */
|
/* Notification ring, accessed via /dev/xen/evtchn. */
|
||||||
#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t))
|
#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t))
|
||||||
|
@ -64,6 +65,7 @@ struct per_user_data {
|
||||||
evtchn_port_t *ring;
|
evtchn_port_t *ring;
|
||||||
unsigned int ring_cons, ring_prod, ring_overflow;
|
unsigned int ring_cons, ring_prod, ring_overflow;
|
||||||
struct mutex ring_cons_mutex; /* protect against concurrent readers */
|
struct mutex ring_cons_mutex; /* protect against concurrent readers */
|
||||||
|
spinlock_t ring_prod_lock; /* product against concurrent interrupts */
|
||||||
|
|
||||||
/* Processes wait on this queue when ring is empty. */
|
/* Processes wait on this queue when ring is empty. */
|
||||||
wait_queue_head_t evtchn_wait;
|
wait_queue_head_t evtchn_wait;
|
||||||
|
@ -71,54 +73,79 @@ struct per_user_data {
|
||||||
const char *name;
|
const char *name;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
struct user_evtchn {
|
||||||
* Who's bound to each port? This is logically an array of struct
|
struct rb_node node;
|
||||||
* per_user_data *, but we encode the current enabled-state in bit 0.
|
struct per_user_data *user;
|
||||||
*/
|
unsigned port;
|
||||||
static unsigned long *port_user;
|
bool enabled;
|
||||||
static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */
|
};
|
||||||
|
|
||||||
static inline struct per_user_data *get_port_user(unsigned port)
|
static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
|
||||||
{
|
{
|
||||||
return (struct per_user_data *)(port_user[port] & ~1);
|
struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL;
|
||||||
|
|
||||||
|
while (*new) {
|
||||||
|
struct user_evtchn *this;
|
||||||
|
|
||||||
|
this = container_of(*new, struct user_evtchn, node);
|
||||||
|
|
||||||
|
parent = *new;
|
||||||
|
if (this->port < evtchn->port)
|
||||||
|
new = &((*new)->rb_left);
|
||||||
|
else if (this->port > evtchn->port)
|
||||||
|
new = &((*new)->rb_right);
|
||||||
|
else
|
||||||
|
return -EEXIST;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add new node and rebalance tree. */
|
||||||
|
rb_link_node(&evtchn->node, parent, new);
|
||||||
|
rb_insert_color(&evtchn->node, &u->evtchns);
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void set_port_user(unsigned port, struct per_user_data *u)
|
static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
|
||||||
{
|
{
|
||||||
port_user[port] = (unsigned long)u;
|
rb_erase(&evtchn->node, &u->evtchns);
|
||||||
|
kfree(evtchn);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool get_port_enabled(unsigned port)
|
static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port)
|
||||||
{
|
{
|
||||||
return port_user[port] & 1;
|
struct rb_node *node = u->evtchns.rb_node;
|
||||||
}
|
|
||||||
|
|
||||||
static inline void set_port_enabled(unsigned port, bool enabled)
|
while (node) {
|
||||||
{
|
struct user_evtchn *evtchn;
|
||||||
if (enabled)
|
|
||||||
port_user[port] |= 1;
|
evtchn = container_of(node, struct user_evtchn, node);
|
||||||
else
|
|
||||||
port_user[port] &= ~1;
|
if (evtchn->port < port)
|
||||||
|
node = node->rb_left;
|
||||||
|
else if (evtchn->port > port)
|
||||||
|
node = node->rb_right;
|
||||||
|
else
|
||||||
|
return evtchn;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static irqreturn_t evtchn_interrupt(int irq, void *data)
|
static irqreturn_t evtchn_interrupt(int irq, void *data)
|
||||||
{
|
{
|
||||||
unsigned int port = (unsigned long)data;
|
struct user_evtchn *evtchn = data;
|
||||||
struct per_user_data *u;
|
struct per_user_data *u = evtchn->user;
|
||||||
|
|
||||||
spin_lock(&port_user_lock);
|
WARN(!evtchn->enabled,
|
||||||
|
|
||||||
u = get_port_user(port);
|
|
||||||
|
|
||||||
WARN(!get_port_enabled(port),
|
|
||||||
"Interrupt for port %d, but apparently not enabled; per-user %p\n",
|
"Interrupt for port %d, but apparently not enabled; per-user %p\n",
|
||||||
port, u);
|
evtchn->port, u);
|
||||||
|
|
||||||
disable_irq_nosync(irq);
|
disable_irq_nosync(irq);
|
||||||
set_port_enabled(port, false);
|
evtchn->enabled = false;
|
||||||
|
|
||||||
|
spin_lock(&u->ring_prod_lock);
|
||||||
|
|
||||||
if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
|
if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
|
||||||
u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
|
u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port;
|
||||||
wmb(); /* Ensure ring contents visible */
|
wmb(); /* Ensure ring contents visible */
|
||||||
if (u->ring_cons == u->ring_prod++) {
|
if (u->ring_cons == u->ring_prod++) {
|
||||||
wake_up_interruptible(&u->evtchn_wait);
|
wake_up_interruptible(&u->evtchn_wait);
|
||||||
|
@ -128,7 +155,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
|
||||||
} else
|
} else
|
||||||
u->ring_overflow = 1;
|
u->ring_overflow = 1;
|
||||||
|
|
||||||
spin_unlock(&port_user_lock);
|
spin_unlock(&u->ring_prod_lock);
|
||||||
|
|
||||||
return IRQ_HANDLED;
|
return IRQ_HANDLED;
|
||||||
}
|
}
|
||||||
|
@ -229,20 +256,20 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
|
||||||
if (copy_from_user(kbuf, buf, count) != 0)
|
if (copy_from_user(kbuf, buf, count) != 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
spin_lock_irq(&port_user_lock);
|
mutex_lock(&u->bind_mutex);
|
||||||
|
|
||||||
for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) {
|
for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) {
|
||||||
unsigned port = kbuf[i];
|
unsigned port = kbuf[i];
|
||||||
|
struct user_evtchn *evtchn;
|
||||||
|
|
||||||
if (port < NR_EVENT_CHANNELS &&
|
evtchn = find_evtchn(u, port);
|
||||||
get_port_user(port) == u &&
|
if (evtchn && !evtchn->enabled) {
|
||||||
!get_port_enabled(port)) {
|
evtchn->enabled = true;
|
||||||
set_port_enabled(port, true);
|
|
||||||
enable_irq(irq_from_evtchn(port));
|
enable_irq(irq_from_evtchn(port));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_unlock_irq(&port_user_lock);
|
mutex_unlock(&u->bind_mutex);
|
||||||
|
|
||||||
rc = count;
|
rc = count;
|
||||||
|
|
||||||
|
@ -253,6 +280,8 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
|
||||||
|
|
||||||
static int evtchn_bind_to_user(struct per_user_data *u, int port)
|
static int evtchn_bind_to_user(struct per_user_data *u, int port)
|
||||||
{
|
{
|
||||||
|
struct user_evtchn *evtchn;
|
||||||
|
struct evtchn_close close;
|
||||||
int rc = 0;
|
int rc = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -263,35 +292,47 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
|
||||||
* interrupt handler yet, and our caller has already
|
* interrupt handler yet, and our caller has already
|
||||||
* serialized bind operations.)
|
* serialized bind operations.)
|
||||||
*/
|
*/
|
||||||
BUG_ON(get_port_user(port) != NULL);
|
|
||||||
set_port_user(port, u);
|
evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL);
|
||||||
set_port_enabled(port, true); /* start enabled */
|
if (!evtchn)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
evtchn->user = u;
|
||||||
|
evtchn->port = port;
|
||||||
|
evtchn->enabled = true; /* start enabled */
|
||||||
|
|
||||||
|
rc = add_evtchn(u, evtchn);
|
||||||
|
if (rc < 0)
|
||||||
|
goto err;
|
||||||
|
|
||||||
rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
|
rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
|
||||||
u->name, (void *)(unsigned long)port);
|
u->name, evtchn);
|
||||||
if (rc >= 0)
|
if (rc < 0)
|
||||||
rc = evtchn_make_refcounted(port);
|
goto err;
|
||||||
else {
|
|
||||||
/* bind failed, should close the port now */
|
|
||||||
struct evtchn_close close;
|
|
||||||
close.port = port;
|
|
||||||
if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
|
|
||||||
BUG();
|
|
||||||
set_port_user(port, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
rc = evtchn_make_refcounted(port);
|
||||||
|
return rc;
|
||||||
|
|
||||||
|
err:
|
||||||
|
/* bind failed, should close the port now */
|
||||||
|
close.port = port;
|
||||||
|
if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
|
||||||
|
BUG();
|
||||||
|
del_evtchn(u, evtchn);
|
||||||
|
kfree(evtchn);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void evtchn_unbind_from_user(struct per_user_data *u, int port)
|
static void evtchn_unbind_from_user(struct per_user_data *u,
|
||||||
|
struct user_evtchn *evtchn)
|
||||||
{
|
{
|
||||||
int irq = irq_from_evtchn(port);
|
int irq = irq_from_evtchn(evtchn->port);
|
||||||
|
|
||||||
BUG_ON(irq < 0);
|
BUG_ON(irq < 0);
|
||||||
|
|
||||||
unbind_from_irqhandler(irq, (void *)(unsigned long)port);
|
unbind_from_irqhandler(irq, evtchn);
|
||||||
|
|
||||||
set_port_user(port, NULL);
|
del_evtchn(u, evtchn);
|
||||||
}
|
}
|
||||||
|
|
||||||
static long evtchn_ioctl(struct file *file,
|
static long evtchn_ioctl(struct file *file,
|
||||||
|
@ -370,6 +411,7 @@ static long evtchn_ioctl(struct file *file,
|
||||||
|
|
||||||
case IOCTL_EVTCHN_UNBIND: {
|
case IOCTL_EVTCHN_UNBIND: {
|
||||||
struct ioctl_evtchn_unbind unbind;
|
struct ioctl_evtchn_unbind unbind;
|
||||||
|
struct user_evtchn *evtchn;
|
||||||
|
|
||||||
rc = -EFAULT;
|
rc = -EFAULT;
|
||||||
if (copy_from_user(&unbind, uarg, sizeof(unbind)))
|
if (copy_from_user(&unbind, uarg, sizeof(unbind)))
|
||||||
|
@ -380,29 +422,27 @@ static long evtchn_ioctl(struct file *file,
|
||||||
break;
|
break;
|
||||||
|
|
||||||
rc = -ENOTCONN;
|
rc = -ENOTCONN;
|
||||||
if (get_port_user(unbind.port) != u)
|
evtchn = find_evtchn(u, unbind.port);
|
||||||
|
if (!evtchn)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
disable_irq(irq_from_evtchn(unbind.port));
|
disable_irq(irq_from_evtchn(unbind.port));
|
||||||
|
evtchn_unbind_from_user(u, evtchn);
|
||||||
evtchn_unbind_from_user(u, unbind.port);
|
|
||||||
|
|
||||||
rc = 0;
|
rc = 0;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case IOCTL_EVTCHN_NOTIFY: {
|
case IOCTL_EVTCHN_NOTIFY: {
|
||||||
struct ioctl_evtchn_notify notify;
|
struct ioctl_evtchn_notify notify;
|
||||||
|
struct user_evtchn *evtchn;
|
||||||
|
|
||||||
rc = -EFAULT;
|
rc = -EFAULT;
|
||||||
if (copy_from_user(¬ify, uarg, sizeof(notify)))
|
if (copy_from_user(¬ify, uarg, sizeof(notify)))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (notify.port >= NR_EVENT_CHANNELS) {
|
rc = -ENOTCONN;
|
||||||
rc = -EINVAL;
|
evtchn = find_evtchn(u, notify.port);
|
||||||
} else if (get_port_user(notify.port) != u) {
|
if (evtchn) {
|
||||||
rc = -ENOTCONN;
|
|
||||||
} else {
|
|
||||||
notify_remote_via_evtchn(notify.port);
|
notify_remote_via_evtchn(notify.port);
|
||||||
rc = 0;
|
rc = 0;
|
||||||
}
|
}
|
||||||
|
@ -412,9 +452,9 @@ static long evtchn_ioctl(struct file *file,
|
||||||
case IOCTL_EVTCHN_RESET: {
|
case IOCTL_EVTCHN_RESET: {
|
||||||
/* Initialise the ring to empty. Clear errors. */
|
/* Initialise the ring to empty. Clear errors. */
|
||||||
mutex_lock(&u->ring_cons_mutex);
|
mutex_lock(&u->ring_cons_mutex);
|
||||||
spin_lock_irq(&port_user_lock);
|
spin_lock_irq(&u->ring_prod_lock);
|
||||||
u->ring_cons = u->ring_prod = u->ring_overflow = 0;
|
u->ring_cons = u->ring_prod = u->ring_overflow = 0;
|
||||||
spin_unlock_irq(&port_user_lock);
|
spin_unlock_irq(&u->ring_prod_lock);
|
||||||
mutex_unlock(&u->ring_cons_mutex);
|
mutex_unlock(&u->ring_cons_mutex);
|
||||||
rc = 0;
|
rc = 0;
|
||||||
break;
|
break;
|
||||||
|
@ -473,6 +513,7 @@ static int evtchn_open(struct inode *inode, struct file *filp)
|
||||||
|
|
||||||
mutex_init(&u->bind_mutex);
|
mutex_init(&u->bind_mutex);
|
||||||
mutex_init(&u->ring_cons_mutex);
|
mutex_init(&u->ring_cons_mutex);
|
||||||
|
spin_lock_init(&u->ring_prod_lock);
|
||||||
|
|
||||||
filp->private_data = u;
|
filp->private_data = u;
|
||||||
|
|
||||||
|
@ -481,15 +522,15 @@ static int evtchn_open(struct inode *inode, struct file *filp)
|
||||||
|
|
||||||
static int evtchn_release(struct inode *inode, struct file *filp)
|
static int evtchn_release(struct inode *inode, struct file *filp)
|
||||||
{
|
{
|
||||||
int i;
|
|
||||||
struct per_user_data *u = filp->private_data;
|
struct per_user_data *u = filp->private_data;
|
||||||
|
struct rb_node *node;
|
||||||
|
|
||||||
for (i = 0; i < NR_EVENT_CHANNELS; i++) {
|
while ((node = u->evtchns.rb_node)) {
|
||||||
if (get_port_user(i) != u)
|
struct user_evtchn *evtchn;
|
||||||
continue;
|
|
||||||
|
|
||||||
disable_irq(irq_from_evtchn(i));
|
evtchn = rb_entry(node, struct user_evtchn, node);
|
||||||
evtchn_unbind_from_user(get_port_user(i), i);
|
disable_irq(irq_from_evtchn(evtchn->port));
|
||||||
|
evtchn_unbind_from_user(u, evtchn);
|
||||||
}
|
}
|
||||||
|
|
||||||
free_page((unsigned long)u->ring);
|
free_page((unsigned long)u->ring);
|
||||||
|
@ -523,12 +564,6 @@ static int __init evtchn_init(void)
|
||||||
if (!xen_domain())
|
if (!xen_domain())
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
|
||||||
port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL);
|
|
||||||
if (port_user == NULL)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
spin_lock_init(&port_user_lock);
|
|
||||||
|
|
||||||
/* Create '/dev/xen/evtchn'. */
|
/* Create '/dev/xen/evtchn'. */
|
||||||
err = misc_register(&evtchn_miscdev);
|
err = misc_register(&evtchn_miscdev);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
|
@ -543,9 +578,6 @@ static int __init evtchn_init(void)
|
||||||
|
|
||||||
static void __exit evtchn_cleanup(void)
|
static void __exit evtchn_cleanup(void)
|
||||||
{
|
{
|
||||||
kfree(port_user);
|
|
||||||
port_user = NULL;
|
|
||||||
|
|
||||||
misc_deregister(&evtchn_miscdev);
|
misc_deregister(&evtchn_miscdev);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue