diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index c188e4f20ca3..97e51c34e6cf 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -971,6 +971,10 @@ static void virtio_ccw_set_status(struct virtio_device *vdev, u8 status) ccw->flags = 0; ccw->count = sizeof(status); ccw->cda = (__u32)(unsigned long)&vcdev->dma_area->status; + /* We use ssch for setting the status which is a serializing + * instruction that guarantees the memory writes have + * completed before ssch. + */ ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_STATUS); /* Write failed? We assume status is unchanged. */ if (ret) diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index 750e5f23406d..0a5670729412 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -771,7 +771,6 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, } ifcvf_mgmt_dev->adapter = adapter; - pci_set_drvdata(pdev, ifcvf_mgmt_dev); vf = &adapter->vf; vf->dev_type = get_dev_type(pdev); @@ -886,6 +885,8 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err; } + pci_set_drvdata(pdev, ifcvf_mgmt_dev); + return 0; err: diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index dcca782c698e..b7a955479156 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -48,6 +48,8 @@ MODULE_LICENSE("Dual BSD/GPL"); #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature))) +#define MLX5V_UNTAGGED 0x1000 + struct mlx5_vdpa_net_resources { u32 tisn; u32 tdn; @@ -144,6 +146,8 @@ static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx) return idx <= mvdev->max_idx; } +#define MLX5V_MACVLAN_SIZE 256 + struct mlx5_vdpa_net { struct mlx5_vdpa_dev mvdev; struct mlx5_vdpa_net_resources res; @@ -157,15 +161,20 @@ struct mlx5_vdpa_net { */ struct rw_semaphore reslock; struct mlx5_flow_table *rxft; - struct mlx5_fc *rx_counter; - struct mlx5_flow_handle *rx_rule_ucast; - struct mlx5_flow_handle *rx_rule_mcast; bool setup; u32 cur_num_vqs; u32 rqt_size; struct notifier_block nb; struct vdpa_callback config_cb; struct mlx5_vdpa_wq_ent cvq_ent; + struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE]; +}; + +struct macvlan_node { + struct hlist_node hlist; + struct mlx5_flow_handle *ucast_rule; + struct mlx5_flow_handle *mcast_rule; + u64 macvlan; }; static void free_resources(struct mlx5_vdpa_net *ndev); @@ -1404,12 +1413,17 @@ static void destroy_tir(struct mlx5_vdpa_net *ndev) mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn); } -static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev) +#define MAX_STEERING_ENT 0x8000 +#define MAX_STEERING_GROUPS 2 + +static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, + u16 vid, bool tagged, + struct mlx5_flow_handle **ucast, + struct mlx5_flow_handle **mcast) { - struct mlx5_flow_destination dest[2] = {}; - struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_namespace *ns; + struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; void *headers_c; void *headers_v; @@ -1422,85 +1436,178 @@ static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev) return -ENOMEM; spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - ft_attr.max_fte = 2; - ft_attr.autogroup.max_num_groups = 2; - - ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); - if (!ns) { - mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n"); - err = -EOPNOTSUPP; - goto err_ns; - } - - ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); - if (IS_ERR(ndev->rxft)) { - err = PTR_ERR(ndev->rxft); - goto err_ns; - } - - ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false); - if (IS_ERR(ndev->rx_counter)) { - err = PTR_ERR(ndev->rx_counter); - goto err_fc; - } - headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); - dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); - memset(dmac_c, 0xff, ETH_ALEN); headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); - ether_addr_copy(dmac_v, ndev->config.mac); - - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dest[0].tir_num = ndev->res.tirn; - dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[1].counter_id = mlx5_fc_id(ndev->rx_counter); - ndev->rx_rule_ucast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 2); - - if (IS_ERR(ndev->rx_rule_ucast)) { - err = PTR_ERR(ndev->rx_rule_ucast); - ndev->rx_rule_ucast = NULL; - goto err_rule_ucast; + memset(dmac_c, 0xff, ETH_ALEN); + ether_addr_copy(dmac_v, mac); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); + if (tagged) { + MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, vid); } + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = ndev->res.tirn; + rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) + return PTR_ERR(rule); + + *ucast = rule; memset(dmac_c, 0, ETH_ALEN); memset(dmac_v, 0, ETH_ALEN); dmac_c[0] = 1; dmac_v[0] = 1; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - ndev->rx_rule_mcast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 1); - if (IS_ERR(ndev->rx_rule_mcast)) { - err = PTR_ERR(ndev->rx_rule_mcast); - ndev->rx_rule_mcast = NULL; - goto err_rule_mcast; + rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1); + kvfree(spec); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto err_mcast; } - kvfree(spec); + *mcast = rule; return 0; -err_rule_mcast: - mlx5_del_flow_rules(ndev->rx_rule_ucast); - ndev->rx_rule_ucast = NULL; -err_rule_ucast: - mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter); -err_fc: - mlx5_destroy_flow_table(ndev->rxft); -err_ns: - kvfree(spec); +err_mcast: + mlx5_del_flow_rules(*ucast); return err; } -static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev) +static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev, + struct mlx5_flow_handle *ucast, + struct mlx5_flow_handle *mcast) { - if (!ndev->rx_rule_ucast) + mlx5_del_flow_rules(ucast); + mlx5_del_flow_rules(mcast); +} + +static u64 search_val(u8 *mac, u16 vlan, bool tagged) +{ + u64 val; + + if (!tagged) + vlan = MLX5V_UNTAGGED; + + val = (u64)vlan << 48 | + (u64)mac[0] << 40 | + (u64)mac[1] << 32 | + (u64)mac[2] << 24 | + (u64)mac[3] << 16 | + (u64)mac[4] << 8 | + (u64)mac[5]; + + return val; +} + +static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value) +{ + struct macvlan_node *pos; + u32 idx; + + idx = hash_64(value, 8); // tbd 8 + hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) { + if (pos->macvlan == value) + return pos; + } + return NULL; +} + +static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) // vlan -> vid +{ + struct macvlan_node *ptr; + u64 val; + u32 idx; + int err; + + val = search_val(mac, vlan, tagged); + if (mac_vlan_lookup(ndev, val)) + return -EEXIST; + + ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, vlan, tagged, + &ptr->ucast_rule, &ptr->mcast_rule); + if (err) + goto err_add; + + ptr->macvlan = val; + idx = hash_64(val, 8); + hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]); + return 0; + +err_add: + kfree(ptr); + return err; +} + +static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) +{ + struct macvlan_node *ptr; + + ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged)); + if (!ptr) return; - mlx5_del_flow_rules(ndev->rx_rule_mcast); - ndev->rx_rule_mcast = NULL; - mlx5_del_flow_rules(ndev->rx_rule_ucast); - ndev->rx_rule_ucast = NULL; - mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter); + hlist_del(&ptr->hlist); + mlx5_vdpa_del_mac_vlan_rules(ndev, ptr->ucast_rule, ptr->mcast_rule); + kfree(ptr); +} + +static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev) +{ + struct macvlan_node *pos; + struct hlist_node *n; + int i; + + for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) { + hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) { + hlist_del(&pos->hlist); + mlx5_vdpa_del_mac_vlan_rules(ndev, pos->ucast_rule, pos->mcast_rule); + kfree(pos); + } + } +} + +static int setup_steering(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + int err; + + ft_attr.max_fte = MAX_STEERING_ENT; + ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS; + + ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); + if (!ns) { + mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n"); + return -EOPNOTSUPP; + } + + ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(ndev->rxft)) { + mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n"); + return PTR_ERR(ndev->rxft); + } + + err = mac_vlan_add(ndev, ndev->config.mac, 0, false); + if (err) + goto err_add; + + return 0; + +err_add: + mlx5_destroy_flow_table(ndev->rxft); + return err; +} + +static void teardown_steering(struct mlx5_vdpa_net *ndev) +{ + clear_mac_vlan_table(ndev); mlx5_destroy_flow_table(ndev->rxft); } @@ -1551,9 +1658,9 @@ static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) /* Need recreate the flow table entry, so that the packet could forward back */ - remove_fwd_to_tir(ndev); + mac_vlan_del(ndev, ndev->config.mac, 0, false); - if (add_fwd_to_tir(ndev)) { + if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) { mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n"); /* Although it hardly run here, we still need double check */ @@ -1577,7 +1684,7 @@ static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) memcpy(ndev->config.mac, mac_back, ETH_ALEN); - if (add_fwd_to_tir(ndev)) + if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n"); break; @@ -1679,6 +1786,42 @@ static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd) return status; } +static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd) +{ + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; + struct mlx5_control_vq *cvq = &mvdev->cvq; + __virtio16 vlan; + size_t read; + u16 id; + + switch (cmd) { + case VIRTIO_NET_CTRL_VLAN_ADD: + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); + if (read != sizeof(vlan)) + break; + + id = mlx5vdpa16_to_cpu(mvdev, vlan); + if (mac_vlan_add(ndev, ndev->config.mac, id, true)) + break; + + status = VIRTIO_NET_OK; + break; + case VIRTIO_NET_CTRL_VLAN_DEL: + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); + if (read != sizeof(vlan)) + break; + + id = mlx5vdpa16_to_cpu(mvdev, vlan); + mac_vlan_del(ndev, ndev->config.mac, id, true); + break; + default: + break; +} + +return status; +} + static void mlx5_cvq_kick_handler(struct work_struct *work) { virtio_net_ctrl_ack status = VIRTIO_NET_ERR; @@ -1724,7 +1867,9 @@ static void mlx5_cvq_kick_handler(struct work_struct *work) case VIRTIO_NET_CTRL_MQ: status = handle_ctrl_mq(mvdev, ctrl.cmd); break; - + case VIRTIO_NET_CTRL_VLAN: + status = handle_ctrl_vlan(mvdev, ctrl.cmd); + break; default: break; } @@ -1989,6 +2134,7 @@ static u64 get_supported_features(struct mlx5_core_dev *mdev) mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ); mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS); mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU); + mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN); return mlx_vdpa_features; } @@ -2274,9 +2420,9 @@ static int setup_driver(struct mlx5_vdpa_dev *mvdev) goto err_tir; } - err = add_fwd_to_tir(ndev); + err = setup_steering(ndev); if (err) { - mlx5_vdpa_warn(mvdev, "add_fwd_to_tir\n"); + mlx5_vdpa_warn(mvdev, "setup_steering\n"); goto err_fwd; } ndev->setup = true; @@ -2302,7 +2448,7 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev) if (!ndev->setup) return; - remove_fwd_to_tir(ndev); + teardown_steering(ndev); destroy_tir(ndev); destroy_rqt(ndev); teardown_virtqueues(ndev); diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 50d721072beb..0f2865899647 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -567,7 +567,7 @@ static int vdpasim_set_group_asid(struct vdpa_device *vdpa, unsigned int group, if (group > vdpasim->dev_attr.ngroups) return -EINVAL; - if (asid > vdpasim->dev_attr.nas) + if (asid >= vdpasim->dev_attr.nas) return -EINVAL; iommu = &vdpasim->iommu[asid]; diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 297b5db47454..68e4ecd1cc0e 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1374,16 +1374,9 @@ static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock, *rx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_RX].vq); } -static void vhost_net_flush_vq(struct vhost_net *n, int index) -{ - vhost_poll_flush(n->poll + index); - vhost_poll_flush(&n->vqs[index].vq.poll); -} - static void vhost_net_flush(struct vhost_net *n) { - vhost_net_flush_vq(n, VHOST_NET_VQ_TX); - vhost_net_flush_vq(n, VHOST_NET_VQ_RX); + vhost_dev_flush(&n->dev); if (n->vqs[VHOST_NET_VQ_TX].ubufs) { mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); n->tx_flush = true; @@ -1572,7 +1565,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) } if (oldsock) { - vhost_net_flush_vq(n, index); + vhost_dev_flush(&n->dev); sockfd_put(oldsock); } diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 532e204f2b1b..ffd9e6c2ffc1 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1436,7 +1436,7 @@ static void vhost_scsi_flush(struct vhost_scsi *vs) kref_put(&old_inflight[i]->kref, vhost_scsi_done_inflight); /* Flush both the vhost poll and vhost work */ - vhost_work_dev_flush(&vs->dev); + vhost_dev_flush(&vs->dev); /* Wait for all reqs issued before the flush to be finished */ for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) @@ -1827,8 +1827,6 @@ static int vhost_scsi_release(struct inode *inode, struct file *f) vhost_scsi_clear_endpoint(vs, &t); vhost_dev_stop(&vs->dev); vhost_dev_cleanup(&vs->dev); - /* Jobs can re-queue themselves in evt kick handler. Do extra flush. */ - vhost_scsi_flush(vs); kfree(vs->dev.vqs); kvfree(vs); return 0; diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 05740cba1cd8..bc8e7fb1e635 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -144,14 +144,9 @@ static void vhost_test_stop(struct vhost_test *n, void **privatep) *privatep = vhost_test_stop_vq(n, n->vqs + VHOST_TEST_VQ); } -static void vhost_test_flush_vq(struct vhost_test *n, int index) -{ - vhost_poll_flush(&n->vqs[index].poll); -} - static void vhost_test_flush(struct vhost_test *n) { - vhost_test_flush_vq(n, VHOST_TEST_VQ); + vhost_dev_flush(&n->dev); } static int vhost_test_release(struct inode *inode, struct file *f) @@ -163,9 +158,6 @@ static int vhost_test_release(struct inode *inode, struct file *f) vhost_test_flush(n); vhost_dev_stop(&n->dev); vhost_dev_cleanup(&n->dev); - /* We do an extra flush before freeing memory, - * since jobs can re-queue themselves. */ - vhost_test_flush(n); kfree(n->dev.vqs); kfree(n); return 0; @@ -210,7 +202,7 @@ static long vhost_test_run(struct vhost_test *n, int test) goto err; if (oldpriv) { - vhost_test_flush_vq(n, index); + vhost_test_flush(n); } } @@ -303,7 +295,7 @@ static long vhost_test_set_backend(struct vhost_test *n, unsigned index, int fd) mutex_unlock(&vq->mutex); if (enable) { - vhost_test_flush_vq(n, index); + vhost_test_flush(n); } mutex_unlock(&n->dev.mutex); diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 3e86080041fc..935a1d0ddb97 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -609,11 +609,13 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, r = vhost_vdpa_get_vring_num(v, argp); break; case VHOST_VDPA_GET_GROUP_NUM: - r = copy_to_user(argp, &v->vdpa->ngroups, - sizeof(v->vdpa->ngroups)); + if (copy_to_user(argp, &v->vdpa->ngroups, + sizeof(v->vdpa->ngroups))) + r = -EFAULT; break; case VHOST_VDPA_GET_AS_NUM: - r = copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas)); + if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas))) + r = -EFAULT; break; case VHOST_SET_LOG_BASE: case VHOST_SET_LOG_FD: diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 5022c648d9c0..40097826cff0 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -231,7 +231,7 @@ void vhost_poll_stop(struct vhost_poll *poll) } EXPORT_SYMBOL_GPL(vhost_poll_stop); -void vhost_work_dev_flush(struct vhost_dev *dev) +void vhost_dev_flush(struct vhost_dev *dev) { struct vhost_flush_struct flush; @@ -243,15 +243,7 @@ void vhost_work_dev_flush(struct vhost_dev *dev) wait_for_completion(&flush.wait_event); } } -EXPORT_SYMBOL_GPL(vhost_work_dev_flush); - -/* Flush any work that has been scheduled. When calling this, don't hold any - * locks that are also used by the callback. */ -void vhost_poll_flush(struct vhost_poll *poll) -{ - vhost_work_dev_flush(poll->dev); -} -EXPORT_SYMBOL_GPL(vhost_poll_flush); +EXPORT_SYMBOL_GPL(vhost_dev_flush); void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) { @@ -538,7 +530,7 @@ static int vhost_attach_cgroups(struct vhost_dev *dev) attach.owner = current; vhost_work_init(&attach.work, vhost_attach_cgroups_work); vhost_work_queue(dev, &attach.work); - vhost_work_dev_flush(dev); + vhost_dev_flush(dev); return attach.ret; } @@ -661,11 +653,11 @@ void vhost_dev_stop(struct vhost_dev *dev) int i; for (i = 0; i < dev->nvqs; ++i) { - if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick) { + if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick) vhost_poll_stop(&dev->vqs[i]->poll); - vhost_poll_flush(&dev->vqs[i]->poll); - } } + + vhost_dev_flush(dev); } EXPORT_SYMBOL_GPL(vhost_dev_stop); @@ -1732,7 +1724,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg mutex_unlock(&vq->mutex); if (pollstop && vq->handle_kick) - vhost_poll_flush(&vq->poll); + vhost_dev_flush(vq->poll.dev); return r; } EXPORT_SYMBOL_GPL(vhost_vring_ioctl); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 9f238d6c7b58..d9109107af08 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -44,9 +44,8 @@ void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, __poll_t mask, struct vhost_dev *dev); int vhost_poll_start(struct vhost_poll *poll, struct file *file); void vhost_poll_stop(struct vhost_poll *poll); -void vhost_poll_flush(struct vhost_poll *poll); void vhost_poll_queue(struct vhost_poll *poll); -void vhost_work_dev_flush(struct vhost_dev *dev); +void vhost_dev_flush(struct vhost_dev *dev); struct vhost_log { u64 addr; diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index e6c9d41db1de..368330417bde 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -705,12 +705,7 @@ out: static void vhost_vsock_flush(struct vhost_vsock *vsock) { - int i; - - for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) - if (vsock->vqs[i].handle_kick) - vhost_poll_flush(&vsock->vqs[i].poll); - vhost_work_dev_flush(&vsock->dev); + vhost_dev_flush(&vsock->dev); } static void vhost_vsock_reset_orphans(struct sock *sk) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index aa1eb5132767..ef04a96942bf 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -220,6 +220,15 @@ static int virtio_features_ok(struct virtio_device *dev) * */ void virtio_reset_device(struct virtio_device *dev) { + /* + * The below virtio_synchronize_cbs() guarantees that any + * interrupt for this line arriving after + * virtio_synchronize_vqs() has completed is guaranteed to see + * vq->broken as true. + */ + virtio_break_device(dev); + virtio_synchronize_cbs(dev); + dev->config->reset(dev); } EXPORT_SYMBOL_GPL(virtio_reset_device); @@ -413,7 +422,7 @@ int register_virtio_device(struct virtio_device *dev) device_initialize(&dev->dev); /* Assign a unique device index and hence name. */ - err = ida_simple_get(&virtio_index_ida, 0, 0, GFP_KERNEL); + err = ida_alloc(&virtio_index_ida, GFP_KERNEL); if (err < 0) goto out; @@ -428,6 +437,9 @@ int register_virtio_device(struct virtio_device *dev) dev->config_enabled = false; dev->config_change_pending = false; + INIT_LIST_HEAD(&dev->vqs); + spin_lock_init(&dev->vqs_list_lock); + /* We always start by resetting the device, in case a previous * driver messed it up. This also tests that code path a little. */ virtio_reset_device(dev); @@ -435,9 +447,6 @@ int register_virtio_device(struct virtio_device *dev) /* Acknowledge that we've seen the device. */ virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); - INIT_LIST_HEAD(&dev->vqs); - spin_lock_init(&dev->vqs_list_lock); - /* * device_add() causes the bus infrastructure to look for a matching * driver. @@ -451,7 +460,7 @@ int register_virtio_device(struct virtio_device *dev) out_of_node_put: of_node_put(dev->dev.of_node); out_ida_remove: - ida_simple_remove(&virtio_index_ida, dev->index); + ida_free(&virtio_index_ida, dev->index); out: virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); return err; @@ -469,7 +478,7 @@ void unregister_virtio_device(struct virtio_device *dev) int index = dev->index; /* save for after device release */ device_unregister(&dev->dev); - ida_simple_remove(&virtio_index_ida, index); + ida_free(&virtio_index_ida, index); } EXPORT_SYMBOL_GPL(unregister_virtio_device); diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index c9699a59f93c..f9a36bc7ac27 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -253,6 +253,11 @@ static void vm_set_status(struct virtio_device *vdev, u8 status) /* We should never be setting status to 0. */ BUG_ON(status == 0); + /* + * Per memory-barriers.txt, wmb() is not needed to guarantee + * that the the cache coherent memory writes have completed + * before writing to the MMIO region. + */ writel(status, vm_dev->base + VIRTIO_MMIO_STATUS); } diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c index 4093f9cca7a6..a0fa14f28a7f 100644 --- a/drivers/virtio/virtio_pci_modern_dev.c +++ b/drivers/virtio/virtio_pci_modern_dev.c @@ -467,6 +467,11 @@ void vp_modern_set_status(struct virtio_pci_modern_device *mdev, { struct virtio_pci_common_cfg __iomem *cfg = mdev->common; + /* + * Per memory-barriers.txt, wmb() is not needed to guarantee + * that the the cache coherent memory writes have completed + * before writing to the MMIO region. + */ vp_iowrite8(status, &cfg->device_status); } EXPORT_SYMBOL_GPL(vp_modern_set_status); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 9c231e1fded7..13a7348cedff 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1688,7 +1688,7 @@ static struct virtqueue *vring_create_virtqueue_packed( vq->we_own_ring = true; vq->notify = notify; vq->weak_barriers = weak_barriers; - vq->broken = false; + vq->broken = true; vq->last_used_idx = 0; vq->event_triggered = false; vq->num_added = 0; @@ -2134,8 +2134,11 @@ irqreturn_t vring_interrupt(int irq, void *_vq) return IRQ_NONE; } - if (unlikely(vq->broken)) - return IRQ_HANDLED; + if (unlikely(vq->broken)) { + dev_warn_once(&vq->vq.vdev->dev, + "virtio vring IRQ raised before DRIVER_OK"); + return IRQ_NONE; + } /* Just a hint for performance: so it's ok that this can be racy! */ if (vq->event) @@ -2177,7 +2180,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->we_own_ring = false; vq->notify = notify; vq->weak_barriers = weak_barriers; - vq->broken = false; + vq->broken = true; vq->last_used_idx = 0; vq->event_triggered = false; vq->num_added = 0; diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 25be018810a7..9a36051ceb76 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -255,7 +255,27 @@ void virtio_device_ready(struct virtio_device *dev) { unsigned status = dev->config->get_status(dev); - BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK); + WARN_ON(status & VIRTIO_CONFIG_S_DRIVER_OK); + + /* + * The virtio_synchronize_cbs() makes sure vring_interrupt() + * will see the driver specific setup if it sees vq->broken + * as false (even if the notifications come before DRIVER_OK). + */ + virtio_synchronize_cbs(dev); + __virtio_unbreak_device(dev); + /* + * The transport should ensure the visibility of vq->broken + * before setting DRIVER_OK. See the comments for the transport + * specific set_status() method. + * + * A well behaved device will only notify a virtqueue after + * DRIVER_OK, this means the device should "see" the coherenct + * memory write that set vq->broken as false which is done by + * the driver when it sees DRIVER_OK, then the following + * driver's vring_interrupt() will see vq->broken as false so + * we won't lose any notification. + */ dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK); }