mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-05 18:41:58 +09:00
RDMA/device: Fix a race between mad_client and cm_client init
[ Upstream commit 7a8bccd8b29c321ac181369b42b04fecf05f98e2 ]
The mad_client will be initialized in enable_device_and_get(), while the
devices_rwsem will be downgraded to a read semaphore. There is a window
that leads to the failed initialization for cm_client, since it can not
get matched mad port from ib_mad_port_list, and the matched mad port will
be added to the list after that.
mad_client | cm_client
------------------|--------------------------------------------------------
ib_register_device|
enable_device_and_get
down_write(&devices_rwsem)
xa_set_mark(&devices, DEVICE_REGISTERED)
downgrade_write(&devices_rwsem)
|
|ib_cm_init
|ib_register_client(&cm_client)
|down_read(&devices_rwsem)
|xa_for_each_marked (&devices, DEVICE_REGISTERED)
|add_client_context
|cm_add_one
|ib_register_mad_agent
|ib_get_mad_port
|__ib_get_mad_port
|list_for_each_entry(entry, &ib_mad_port_list, port_list)
|return NULL
|up_read(&devices_rwsem)
|
add_client_context|
ib_mad_init_device|
ib_mad_port_open |
list_add_tail(&port_priv->port_list, &ib_mad_port_list)
up_read(&devices_rwsem)
|
Fix it by using down_write(&devices_rwsem) in ib_register_client().
Fixes: d0899892ed ("RDMA/device: Provide APIs from the core code to help unregistration")
Link: https://lore.kernel.org/r/20240203035313.98991-1-lishifeng@sangfor.com.cn
Suggested-by: Jason Gunthorpe <jgg@ziepe.ca>
Signed-off-by: Shifeng Li <lishifeng@sangfor.com.cn>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
@@ -1730,7 +1730,7 @@ static int assign_client_id(struct ib_client *client)
|
|||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
down_write(&clients_rwsem);
|
lockdep_assert_held(&clients_rwsem);
|
||||||
/*
|
/*
|
||||||
* The add/remove callbacks must be called in FIFO/LIFO order. To
|
* The add/remove callbacks must be called in FIFO/LIFO order. To
|
||||||
* achieve this we assign client_ids so they are sorted in
|
* achieve this we assign client_ids so they are sorted in
|
||||||
@@ -1739,14 +1739,11 @@ static int assign_client_id(struct ib_client *client)
|
|||||||
client->client_id = highest_client_id;
|
client->client_id = highest_client_id;
|
||||||
ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL);
|
ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
return ret;
|
||||||
|
|
||||||
highest_client_id++;
|
highest_client_id++;
|
||||||
xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
|
xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
|
||||||
|
return 0;
|
||||||
out:
|
|
||||||
up_write(&clients_rwsem);
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void remove_client_id(struct ib_client *client)
|
static void remove_client_id(struct ib_client *client)
|
||||||
@@ -1776,25 +1773,35 @@ int ib_register_client(struct ib_client *client)
|
|||||||
{
|
{
|
||||||
struct ib_device *device;
|
struct ib_device *device;
|
||||||
unsigned long index;
|
unsigned long index;
|
||||||
|
bool need_unreg = false;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
refcount_set(&client->uses, 1);
|
refcount_set(&client->uses, 1);
|
||||||
init_completion(&client->uses_zero);
|
init_completion(&client->uses_zero);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The devices_rwsem is held in write mode to ensure that a racing
|
||||||
|
* ib_register_device() sees a consisent view of clients and devices.
|
||||||
|
*/
|
||||||
|
down_write(&devices_rwsem);
|
||||||
|
down_write(&clients_rwsem);
|
||||||
ret = assign_client_id(client);
|
ret = assign_client_id(client);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
goto out;
|
||||||
|
|
||||||
down_read(&devices_rwsem);
|
need_unreg = true;
|
||||||
xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) {
|
xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) {
|
||||||
ret = add_client_context(device, client);
|
ret = add_client_context(device, client);
|
||||||
if (ret) {
|
if (ret)
|
||||||
up_read(&devices_rwsem);
|
goto out;
|
||||||
ib_unregister_client(client);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
up_read(&devices_rwsem);
|
ret = 0;
|
||||||
return 0;
|
out:
|
||||||
|
up_write(&clients_rwsem);
|
||||||
|
up_write(&devices_rwsem);
|
||||||
|
if (need_unreg && ret)
|
||||||
|
ib_unregister_client(client);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(ib_register_client);
|
EXPORT_SYMBOL(ib_register_client);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user