IB/ipoib: Get rid of the sysfs_mutex
This mutex was introduced to deal with the deadlock formed by calling unregister_netdev from within the sysfs callback of a netdev. Now that we have priv_destructor and needs_free_netdev we can switch to the more targeted solution of running the unregister from a work queue. This avoids the deadlock and gets rid of the mutex. The next patch in the series needs this mutex eliminated to create atomicity of unregisteration. Signed-off-by: Jason Gunthorpe <jgg@mellanox.com> Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
This commit is contained in:
parent
9f49a5b5c2
commit
ee190ab734
4 changed files with 78 additions and 61 deletions
|
@ -332,7 +332,6 @@ struct ipoib_dev_priv {
|
||||||
|
|
||||||
struct rw_semaphore vlan_rwsem;
|
struct rw_semaphore vlan_rwsem;
|
||||||
struct mutex mcast_mutex;
|
struct mutex mcast_mutex;
|
||||||
struct mutex sysfs_mutex;
|
|
||||||
|
|
||||||
struct rb_root path_tree;
|
struct rb_root path_tree;
|
||||||
struct list_head path_list;
|
struct list_head path_list;
|
||||||
|
|
|
@ -1517,19 +1517,13 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
|
||||||
{
|
{
|
||||||
struct net_device *dev = to_net_dev(d);
|
struct net_device *dev = to_net_dev(d);
|
||||||
int ret;
|
int ret;
|
||||||
struct ipoib_dev_priv *priv = ipoib_priv(dev);
|
|
||||||
|
|
||||||
if (!mutex_trylock(&priv->sysfs_mutex))
|
|
||||||
return restart_syscall();
|
|
||||||
|
|
||||||
if (!rtnl_trylock()) {
|
if (!rtnl_trylock()) {
|
||||||
mutex_unlock(&priv->sysfs_mutex);
|
|
||||||
return restart_syscall();
|
return restart_syscall();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dev->reg_state != NETREG_REGISTERED) {
|
if (dev->reg_state != NETREG_REGISTERED) {
|
||||||
rtnl_unlock();
|
rtnl_unlock();
|
||||||
mutex_unlock(&priv->sysfs_mutex);
|
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1541,7 +1535,6 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
|
||||||
*/
|
*/
|
||||||
if (ret != -EBUSY)
|
if (ret != -EBUSY)
|
||||||
rtnl_unlock();
|
rtnl_unlock();
|
||||||
mutex_unlock(&priv->sysfs_mutex);
|
|
||||||
|
|
||||||
return (!ret || ret == -EBUSY) ? count : ret;
|
return (!ret || ret == -EBUSY) ? count : ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -2079,7 +2079,6 @@ static void ipoib_build_priv(struct net_device *dev)
|
||||||
spin_lock_init(&priv->lock);
|
spin_lock_init(&priv->lock);
|
||||||
init_rwsem(&priv->vlan_rwsem);
|
init_rwsem(&priv->vlan_rwsem);
|
||||||
mutex_init(&priv->mcast_mutex);
|
mutex_init(&priv->mcast_mutex);
|
||||||
mutex_init(&priv->sysfs_mutex);
|
|
||||||
|
|
||||||
INIT_LIST_HEAD(&priv->path_list);
|
INIT_LIST_HEAD(&priv->path_list);
|
||||||
INIT_LIST_HEAD(&priv->child_intfs);
|
INIT_LIST_HEAD(&priv->child_intfs);
|
||||||
|
@ -2476,10 +2475,7 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
|
||||||
list_for_each_entry_safe(priv, tmp, dev_list, list) {
|
list_for_each_entry_safe(priv, tmp, dev_list, list) {
|
||||||
ipoib_parent_unregister_pre(priv->dev);
|
ipoib_parent_unregister_pre(priv->dev);
|
||||||
|
|
||||||
/* Wrap rtnl_lock/unlock with mutex to protect sysfs calls */
|
|
||||||
mutex_lock(&priv->sysfs_mutex);
|
|
||||||
unregister_netdev(priv->dev);
|
unregister_netdev(priv->dev);
|
||||||
mutex_unlock(&priv->sysfs_mutex);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
kfree(dev_list);
|
kfree(dev_list);
|
||||||
|
@ -2527,8 +2523,7 @@ static int __init ipoib_init_module(void)
|
||||||
* its private workqueue, and we only queue up flush events
|
* its private workqueue, and we only queue up flush events
|
||||||
* on our global flush workqueue. This avoids the deadlocks.
|
* on our global flush workqueue. This avoids the deadlocks.
|
||||||
*/
|
*/
|
||||||
ipoib_workqueue = alloc_ordered_workqueue("ipoib_flush",
|
ipoib_workqueue = alloc_ordered_workqueue("ipoib_flush", 0);
|
||||||
WQ_MEM_RECLAIM);
|
|
||||||
if (!ipoib_workqueue) {
|
if (!ipoib_workqueue) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto err_fs;
|
goto err_fs;
|
||||||
|
|
|
@ -125,23 +125,16 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
|
||||||
snprintf(intf_name, sizeof(intf_name), "%s.%04x",
|
snprintf(intf_name, sizeof(intf_name), "%s.%04x",
|
||||||
ppriv->dev->name, pkey);
|
ppriv->dev->name, pkey);
|
||||||
|
|
||||||
if (!mutex_trylock(&ppriv->sysfs_mutex))
|
if (!rtnl_trylock())
|
||||||
return restart_syscall();
|
return restart_syscall();
|
||||||
|
|
||||||
if (!rtnl_trylock()) {
|
|
||||||
mutex_unlock(&ppriv->sysfs_mutex);
|
|
||||||
return restart_syscall();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pdev->reg_state != NETREG_REGISTERED) {
|
if (pdev->reg_state != NETREG_REGISTERED) {
|
||||||
rtnl_unlock();
|
rtnl_unlock();
|
||||||
mutex_unlock(&ppriv->sysfs_mutex);
|
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!down_write_trylock(&ppriv->vlan_rwsem)) {
|
if (!down_write_trylock(&ppriv->vlan_rwsem)) {
|
||||||
rtnl_unlock();
|
rtnl_unlock();
|
||||||
mutex_unlock(&ppriv->sysfs_mutex);
|
|
||||||
return restart_syscall();
|
return restart_syscall();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -178,58 +171,95 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
|
||||||
out:
|
out:
|
||||||
up_write(&ppriv->vlan_rwsem);
|
up_write(&ppriv->vlan_rwsem);
|
||||||
rtnl_unlock();
|
rtnl_unlock();
|
||||||
mutex_unlock(&ppriv->sysfs_mutex);
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
|
struct ipoib_vlan_delete_work {
|
||||||
|
struct work_struct work;
|
||||||
|
struct net_device *dev;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* sysfs callbacks of a netdevice cannot obtain the rtnl lock as
|
||||||
|
* unregister_netdev ultimately deletes the sysfs files while holding the rtnl
|
||||||
|
* lock. This deadlocks the system.
|
||||||
|
*
|
||||||
|
* A callback can use rtnl_trylock to avoid the deadlock but it cannot call
|
||||||
|
* unregister_netdev as that internally takes and releases the rtnl_lock. So
|
||||||
|
* instead we find the netdev to unregister and then do the actual unregister
|
||||||
|
* from the global work queue where we can obtain the rtnl_lock safely.
|
||||||
|
*/
|
||||||
|
static void ipoib_vlan_delete_task(struct work_struct *work)
|
||||||
{
|
{
|
||||||
struct ipoib_dev_priv *ppriv, *priv, *tpriv;
|
struct ipoib_vlan_delete_work *pwork =
|
||||||
struct net_device *dev = NULL;
|
container_of(work, struct ipoib_vlan_delete_work, work);
|
||||||
|
struct net_device *dev = pwork->dev;
|
||||||
|
|
||||||
if (!capable(CAP_NET_ADMIN))
|
rtnl_lock();
|
||||||
return -EPERM;
|
|
||||||
|
|
||||||
ppriv = ipoib_priv(pdev);
|
/* Unregistering tasks can race with another task or parent removal */
|
||||||
|
if (dev->reg_state == NETREG_REGISTERED) {
|
||||||
|
struct ipoib_dev_priv *priv = ipoib_priv(dev);
|
||||||
|
struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
|
||||||
|
|
||||||
if (!mutex_trylock(&ppriv->sysfs_mutex))
|
down_write(&ppriv->vlan_rwsem);
|
||||||
return restart_syscall();
|
list_del(&priv->list);
|
||||||
|
up_write(&ppriv->vlan_rwsem);
|
||||||
|
|
||||||
if (!rtnl_trylock()) {
|
|
||||||
mutex_unlock(&ppriv->sysfs_mutex);
|
|
||||||
return restart_syscall();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pdev->reg_state != NETREG_REGISTERED) {
|
|
||||||
rtnl_unlock();
|
|
||||||
mutex_unlock(&ppriv->sysfs_mutex);
|
|
||||||
return -EPERM;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!down_write_trylock(&ppriv->vlan_rwsem)) {
|
|
||||||
rtnl_unlock();
|
|
||||||
mutex_unlock(&ppriv->sysfs_mutex);
|
|
||||||
return restart_syscall();
|
|
||||||
}
|
|
||||||
|
|
||||||
list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
|
|
||||||
if (priv->pkey == pkey &&
|
|
||||||
priv->child_type == IPOIB_LEGACY_CHILD) {
|
|
||||||
list_del(&priv->list);
|
|
||||||
dev = priv->dev;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
up_write(&ppriv->vlan_rwsem);
|
|
||||||
|
|
||||||
if (dev) {
|
|
||||||
ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name);
|
ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name);
|
||||||
unregister_netdevice(dev);
|
unregister_netdevice(dev);
|
||||||
}
|
}
|
||||||
|
|
||||||
rtnl_unlock();
|
rtnl_unlock();
|
||||||
mutex_unlock(&ppriv->sysfs_mutex);
|
|
||||||
|
|
||||||
return (dev) ? 0 : -ENODEV;
|
kfree(pwork);
|
||||||
|
}
|
||||||
|
|
||||||
|
int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
|
||||||
|
{
|
||||||
|
struct ipoib_dev_priv *ppriv, *priv, *tpriv;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
if (!capable(CAP_NET_ADMIN))
|
||||||
|
return -EPERM;
|
||||||
|
|
||||||
|
if (!rtnl_trylock())
|
||||||
|
return restart_syscall();
|
||||||
|
|
||||||
|
if (pdev->reg_state != NETREG_REGISTERED) {
|
||||||
|
rtnl_unlock();
|
||||||
|
return -EPERM;
|
||||||
|
}
|
||||||
|
|
||||||
|
ppriv = ipoib_priv(pdev);
|
||||||
|
|
||||||
|
rc = -ENODEV;
|
||||||
|
list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
|
||||||
|
if (priv->pkey == pkey &&
|
||||||
|
priv->child_type == IPOIB_LEGACY_CHILD) {
|
||||||
|
struct ipoib_vlan_delete_work *work;
|
||||||
|
|
||||||
|
work = kmalloc(sizeof(*work), GFP_KERNEL);
|
||||||
|
if (!work) {
|
||||||
|
rc = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
down_write(&ppriv->vlan_rwsem);
|
||||||
|
list_del_init(&priv->list);
|
||||||
|
up_write(&ppriv->vlan_rwsem);
|
||||||
|
work->dev = priv->dev;
|
||||||
|
INIT_WORK(&work->work, ipoib_vlan_delete_task);
|
||||||
|
queue_work(ipoib_workqueue, &work->work);
|
||||||
|
|
||||||
|
rc = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
rtnl_unlock();
|
||||||
|
|
||||||
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue