From 939779f5152d161b34f612af29e7dc1ac4472fcf Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 27 Oct 2021 10:21:04 +0800 Subject: [PATCH 01/12] virtio_ring: validate used buffer length This patch validate the used buffer length provided by the device before trying to use it. This is done by record the in buffer length in a new field in desc_state structure during virtqueue_add(), then we can fail the virtqueue_get_buf() when we find the device is trying to give us a used buffer length which is greater than the in buffer length. Since some drivers have already done the validation by themselves, this patch tries to makes the core validation optional. For the driver that doesn't want the validation, it can set the suppress_used_validation to be true (which could be overridden by force_used_validation module parameter). To be more efficient, a dedicate array is used for storing the validate used length, this helps to eliminate the cache stress if validation is done by the driver. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20211027022107.14357-2-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 60 ++++++++++++++++++++++++++++++++++++ include/linux/virtio.h | 2 ++ 2 files changed, 62 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 6d2614e34470..00f64f2f8b72 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -14,6 +14,9 @@ #include #include +static bool force_used_validation = false; +module_param(force_used_validation, bool, 0444); + #ifdef DEBUG /* For development, we want to crash whenever the ring is screwed. */ #define BAD_RING(_vq, fmt, args...) \ @@ -182,6 +185,9 @@ struct vring_virtqueue { } packed; }; + /* Per-descriptor in buffer length */ + u32 *buflen; + /* How to notify other side. FIXME: commonalize hcalls! */ bool (*notify)(struct virtqueue *vq); @@ -490,6 +496,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, unsigned int i, n, avail, descs_used, prev, err_idx; int head; bool indirect; + u32 buflen = 0; START_USE(vq); @@ -571,6 +578,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE, indirect); + buflen += sg->length; } } /* Last one doesn't continue. */ @@ -610,6 +618,10 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, else vq->split.desc_state[head].indir_desc = ctx; + /* Store in buffer length if necessary */ + if (vq->buflen) + vq->buflen[head] = buflen; + /* Put entry in available array (but don't update avail->idx until they * do sync). */ avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); @@ -784,6 +796,11 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, BAD_RING(vq, "id %u is not a head!\n", i); return NULL; } + if (vq->buflen && unlikely(*len > vq->buflen[i])) { + BAD_RING(vq, "used len %d is larger than in buflen %u\n", + *len, vq->buflen[i]); + return NULL; + } /* detach_buf_split clears data, so grab it now. */ ret = vq->split.desc_state[i].data; @@ -1062,6 +1079,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, unsigned int i, n, err_idx; u16 head, id; dma_addr_t addr; + u32 buflen = 0; head = vq->packed.next_avail_idx; desc = alloc_indirect_packed(total_sg, gfp); @@ -1091,6 +1109,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, desc[i].addr = cpu_to_le64(addr); desc[i].len = cpu_to_le32(sg->length); i++; + if (n >= out_sgs) + buflen += sg->length; } } @@ -1144,6 +1164,10 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, vq->packed.desc_state[id].indir_desc = desc; vq->packed.desc_state[id].last = id; + /* Store in buffer length if necessary */ + if (vq->buflen) + vq->buflen[id] = buflen; + vq->num_added += 1; pr_debug("Added buffer head %i to %p\n", head, vq); @@ -1179,6 +1203,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, __le16 head_flags, flags; u16 head, id, prev, curr, avail_used_flags; int err; + u32 buflen = 0; START_USE(vq); @@ -1258,6 +1283,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, 1 << VRING_PACKED_DESC_F_AVAIL | 1 << VRING_PACKED_DESC_F_USED; } + if (n >= out_sgs) + buflen += sg->length; } } @@ -1277,6 +1304,10 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, vq->packed.desc_state[id].indir_desc = ctx; vq->packed.desc_state[id].last = prev; + /* Store in buffer length if necessary */ + if (vq->buflen) + vq->buflen[id] = buflen; + /* * A driver MUST NOT make the first descriptor in the list * available before all subsequent descriptors comprising @@ -1463,6 +1494,11 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, BAD_RING(vq, "id %u is not a head!\n", id); return NULL; } + if (vq->buflen && unlikely(*len > vq->buflen[id])) { + BAD_RING(vq, "used len %d is larger than in buflen %u\n", + *len, vq->buflen[id]); + return NULL; + } /* detach_buf_packed clears data, so grab it now. */ ret = vq->packed.desc_state[id].data; @@ -1668,6 +1704,7 @@ static struct virtqueue *vring_create_virtqueue_packed( struct vring_virtqueue *vq; struct vring_packed_desc *ring; struct vring_packed_desc_event *driver, *device; + struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver); dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; size_t ring_size_in_bytes, event_size_in_bytes; @@ -1757,6 +1794,15 @@ static struct virtqueue *vring_create_virtqueue_packed( if (!vq->packed.desc_extra) goto err_desc_extra; + if (!drv->suppress_used_validation || force_used_validation) { + vq->buflen = kmalloc_array(num, sizeof(*vq->buflen), + GFP_KERNEL); + if (!vq->buflen) + goto err_buflen; + } else { + vq->buflen = NULL; + } + /* No callback? Tell other side not to bother us. */ if (!callback) { vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; @@ -1769,6 +1815,8 @@ static struct virtqueue *vring_create_virtqueue_packed( spin_unlock(&vdev->vqs_list_lock); return &vq->vq; +err_buflen: + kfree(vq->packed.desc_extra); err_desc_extra: kfree(vq->packed.desc_state); err_desc_state: @@ -2176,6 +2224,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, void (*callback)(struct virtqueue *), const char *name) { + struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver); struct vring_virtqueue *vq; if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) @@ -2235,6 +2284,15 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, if (!vq->split.desc_extra) goto err_extra; + if (!drv->suppress_used_validation || force_used_validation) { + vq->buflen = kmalloc_array(vring.num, sizeof(*vq->buflen), + GFP_KERNEL); + if (!vq->buflen) + goto err_buflen; + } else { + vq->buflen = NULL; + } + /* Put everything in free lists. */ vq->free_head = 0; memset(vq->split.desc_state, 0, vring.num * @@ -2245,6 +2303,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, spin_unlock(&vdev->vqs_list_lock); return &vq->vq; +err_buflen: + kfree(vq->split.desc_extra); err_extra: kfree(vq->split.desc_state); err_state: diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 41edbc01ffa4..44d0e09da2d9 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -152,6 +152,7 @@ size_t virtio_max_dma_size(struct virtio_device *vdev); * @feature_table_size: number of entries in the feature table array. * @feature_table_legacy: same as feature_table but when working in legacy mode. * @feature_table_size_legacy: number of entries in feature table legacy array. + * @suppress_used_validation: set to not have core validate used length * @probe: the function to call when a device is found. Returns 0 or -errno. * @scan: optional function to call after successful probe; intended * for virtio-scsi to invoke a scan. @@ -168,6 +169,7 @@ struct virtio_driver { unsigned int feature_table_size; const unsigned int *feature_table_legacy; unsigned int feature_table_size_legacy; + bool suppress_used_validation; int (*validate)(struct virtio_device *dev); int (*probe)(struct virtio_device *dev); void (*scan)(struct virtio_device *dev); From 816625c13652cef5b2c49082d652875da6f2ad7a Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 27 Oct 2021 10:21:05 +0800 Subject: [PATCH 02/12] virtio-net: don't let virtio core to validate used length For RX virtuqueue, the used length is validated in all the three paths (big, small and mergeable). For control vq, we never tries to use used length. So this patch forbids the core to validate the used length. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20211027022107.14357-3-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/net/virtio_net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 6d8c8745bf24..7c43bfc1ce44 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3385,6 +3385,7 @@ static struct virtio_driver virtio_net_driver = { .feature_table_size = ARRAY_SIZE(features), .feature_table_legacy = features_legacy, .feature_table_size_legacy = ARRAY_SIZE(features_legacy), + .suppress_used_validation = true, .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, From a40392edf1b2c7822bc0ce68413106661a9d4232 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 27 Oct 2021 10:21:06 +0800 Subject: [PATCH 03/12] virtio-blk: don't let virtio core to validate used length We never tries to use used length, so the patch prevents the virtio core from validating used length. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20211027022107.14357-4-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index ec6ed24d1061..e1f4f63ed246 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1041,6 +1041,7 @@ static struct virtio_driver virtio_blk = { .feature_table_size = ARRAY_SIZE(features), .feature_table_legacy = features_legacy, .feature_table_size_legacy = ARRAY_SIZE(features_legacy), + .suppress_used_validation = true, .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, From c57911ebfbfe745cb95da2bcf547c5bae000590f Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 27 Oct 2021 10:21:07 +0800 Subject: [PATCH 04/12] virtio-scsi: don't let virtio core to validate used buffer length We never tries to use used length, so the patch prevents the virtio core from validating used length. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20211027022107.14357-5-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/scsi/virtio_scsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 07d0250f17c3..03b09ecea42d 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -977,6 +977,7 @@ static unsigned int features[] = { static struct virtio_driver virtio_scsi_driver = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), + .suppress_used_validation = true, .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, From 6dbb1f1687a2ccdfc5b84b0a35bbc6dfefc4de3b Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 26 Oct 2021 20:55:12 +0300 Subject: [PATCH 05/12] vdpa: Introduce and use vdpa device get, set config helpers Subsequent patches enable get and set configuration either via management device or via vdpa device' config ops. This requires synchronization between multiple callers to get and set config callbacks. Features setting also influence the layout of the configuration fields endianness. To avoid exposing synchronization primitives to callers, introduce helper for setting the configuration and use it. Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Acked-by: Jason Wang Reviewed-by: Stefano Garzarella Link: https://lore.kernel.org/r/20211026175519.87795-2-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 36 ++++++++++++++++++++++++++++++++++++ drivers/vhost/vdpa.c | 3 +-- drivers/virtio/virtio_vdpa.c | 3 +-- include/linux/vdpa.h | 19 ++++--------------- 4 files changed, 42 insertions(+), 19 deletions(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index fcf02a364878..cbc8fc69cf9b 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -297,6 +297,42 @@ void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev) } EXPORT_SYMBOL_GPL(vdpa_mgmtdev_unregister); +/** + * vdpa_get_config - Get one or more device configuration fields. + * @vdev: vdpa device to operate on + * @offset: starting byte offset of the field + * @buf: buffer pointer to read to + * @len: length of the configuration fields in bytes + */ +void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, + void *buf, unsigned int len) +{ + const struct vdpa_config_ops *ops = vdev->config; + + /* + * Config accesses aren't supposed to trigger before features are set. + * If it does happen we assume a legacy guest. + */ + if (!vdev->features_valid) + vdpa_set_features(vdev, 0); + ops->get_config(vdev, offset, buf, len); +} +EXPORT_SYMBOL_GPL(vdpa_get_config); + +/** + * vdpa_set_config - Set one or more device configuration fields. + * @vdev: vdpa device to operate on + * @offset: starting byte offset of the field + * @buf: buffer pointer to read from + * @length: length of the configuration fields in bytes + */ +void vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, + const void *buf, unsigned int length) +{ + vdev->config->set_config(vdev, offset, buf, length); +} +EXPORT_SYMBOL_GPL(vdpa_set_config); + static bool mgmtdev_handle_match(const struct vdpa_mgmt_dev *mdev, const char *busname, const char *devname) { diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 39039e046117..01c59ce7e250 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -237,7 +237,6 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v, struct vhost_vdpa_config __user *c) { struct vdpa_device *vdpa = v->vdpa; - const struct vdpa_config_ops *ops = vdpa->config; struct vhost_vdpa_config config; unsigned long size = offsetof(struct vhost_vdpa_config, buf); u8 *buf; @@ -251,7 +250,7 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v, if (IS_ERR(buf)) return PTR_ERR(buf); - ops->set_config(vdpa, config.off, buf, config.len); + vdpa_set_config(vdpa, config.off, buf, config.len); kvfree(buf); return 0; diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 6b62aaf08cc5..f85f860bc10b 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -65,9 +65,8 @@ static void virtio_vdpa_set(struct virtio_device *vdev, unsigned offset, const void *buf, unsigned len) { struct vdpa_device *vdpa = vd_get_vdpa(vdev); - const struct vdpa_config_ops *ops = vdpa->config; - ops->set_config(vdpa, offset, buf, len); + vdpa_set_config(vdpa, offset, buf, len); } static u32 virtio_vdpa_generation(struct virtio_device *vdev) diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 30864848950b..267236aab34c 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -386,21 +386,10 @@ static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features) return ops->set_features(vdev, features); } -static inline void vdpa_get_config(struct vdpa_device *vdev, - unsigned int offset, void *buf, - unsigned int len) -{ - const struct vdpa_config_ops *ops = vdev->config; - - /* - * Config accesses aren't supposed to trigger before features are set. - * If it does happen we assume a legacy guest. - */ - if (!vdev->features_valid) - vdpa_set_features(vdev, 0); - ops->get_config(vdev, offset, buf, len); -} - +void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, + void *buf, unsigned int len); +void vdpa_set_config(struct vdpa_device *dev, unsigned int offset, + const void *buf, unsigned int length); /** * struct vdpa_mgmtdev_ops - vdpa device ops * @dev_add: Add a vdpa device using alloc and register From ad69dd0bf26b88ec6ab26f8bbe5cd74fbed7672a Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 26 Oct 2021 20:55:13 +0300 Subject: [PATCH 06/12] vdpa: Introduce query of device config layout Introduce a command to query a device config layout. An example query of network vdpa device: $ vdpa dev add name bar mgmtdev vdpasim_net $ vdpa dev config show bar: mac 00:35:09:19:48:05 link up link_announce false mtu 1500 $ vdpa dev config show -jp { "config": { "bar": { "mac": "00:35:09:19:48:05", "link ": "up", "link_announce ": false, "mtu": 1500, } } } Signed-off-by: Parav Pandit Signed-off-by: Eli Cohen Acked-by: Jason Wang Link: https://lore.kernel.org/r/20211026175519.87795-3-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 176 ++++++++++++++++++++++++++++++++++++++ include/linux/vdpa.h | 2 + include/uapi/linux/vdpa.h | 6 ++ 3 files changed, 184 insertions(+) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index cbc8fc69cf9b..8fcbdda8590c 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include static LIST_HEAD(mdev_head); /* A global mutex that protects vdpa management device and device level operations. */ @@ -66,6 +68,7 @@ static void vdpa_release_dev(struct device *d) ops->free(vdev); ida_simple_remove(&vdpa_index_ida, vdev->index); + mutex_destroy(&vdev->cf_mutex); kfree(vdev); } @@ -127,6 +130,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, if (err) goto err_name; + mutex_init(&vdev->cf_mutex); device_initialize(&vdev->dev); return vdev; @@ -309,6 +313,7 @@ void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, { const struct vdpa_config_ops *ops = vdev->config; + mutex_lock(&vdev->cf_mutex); /* * Config accesses aren't supposed to trigger before features are set. * If it does happen we assume a legacy guest. @@ -316,6 +321,7 @@ void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, if (!vdev->features_valid) vdpa_set_features(vdev, 0); ops->get_config(vdev, offset, buf, len); + mutex_unlock(&vdev->cf_mutex); } EXPORT_SYMBOL_GPL(vdpa_get_config); @@ -329,7 +335,9 @@ EXPORT_SYMBOL_GPL(vdpa_get_config); void vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf, unsigned int length) { + mutex_lock(&vdev->cf_mutex); vdev->config->set_config(vdev, offset, buf, length); + mutex_unlock(&vdev->cf_mutex); } EXPORT_SYMBOL_GPL(vdpa_set_config); @@ -661,6 +669,168 @@ static int vdpa_nl_cmd_dev_get_dumpit(struct sk_buff *msg, struct netlink_callba return msg->len; } +static int vdpa_dev_net_mq_config_fill(struct vdpa_device *vdev, + struct sk_buff *msg, u64 features, + const struct virtio_net_config *config) +{ + u16 val_u16; + + if ((features & (1ULL << VIRTIO_NET_F_MQ)) == 0) + return 0; + + val_u16 = le16_to_cpu(config->max_virtqueue_pairs); + return nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, val_u16); +} + +static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *msg) +{ + struct virtio_net_config config = {}; + u64 features; + u16 val_u16; + + vdpa_get_config(vdev, 0, &config, sizeof(config)); + + if (nla_put(msg, VDPA_ATTR_DEV_NET_CFG_MACADDR, sizeof(config.mac), + config.mac)) + return -EMSGSIZE; + + val_u16 = le16_to_cpu(config.status); + if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_STATUS, val_u16)) + return -EMSGSIZE; + + val_u16 = le16_to_cpu(config.mtu); + if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16)) + return -EMSGSIZE; + + features = vdev->config->get_features(vdev); + + return vdpa_dev_net_mq_config_fill(vdev, msg, features, &config); +} + +static int +vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq, + int flags, struct netlink_ext_ack *extack) +{ + u32 device_id; + void *hdr; + int err; + + hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, + VDPA_CMD_DEV_CONFIG_GET); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) { + err = -EMSGSIZE; + goto msg_err; + } + + device_id = vdev->config->get_device_id(vdev); + if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) { + err = -EMSGSIZE; + goto msg_err; + } + + switch (device_id) { + case VIRTIO_ID_NET: + err = vdpa_dev_net_config_fill(vdev, msg); + break; + default: + err = -EOPNOTSUPP; + break; + } + if (err) + goto msg_err; + + genlmsg_end(msg, hdr); + return 0; + +msg_err: + genlmsg_cancel(msg, hdr); + return err; +} + +static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct vdpa_device *vdev; + struct sk_buff *msg; + const char *devname; + struct device *dev; + int err; + + if (!info->attrs[VDPA_ATTR_DEV_NAME]) + return -EINVAL; + devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + mutex_lock(&vdpa_dev_mutex); + dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match); + if (!dev) { + NL_SET_ERR_MSG_MOD(info->extack, "device not found"); + err = -ENODEV; + goto dev_err; + } + vdev = container_of(dev, struct vdpa_device, dev); + if (!vdev->mdev) { + NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device"); + err = -EINVAL; + goto mdev_err; + } + err = vdpa_dev_config_fill(vdev, msg, info->snd_portid, info->snd_seq, + 0, info->extack); + if (!err) + err = genlmsg_reply(msg, info); + +mdev_err: + put_device(dev); +dev_err: + mutex_unlock(&vdpa_dev_mutex); + if (err) + nlmsg_free(msg); + return err; +} + +static int vdpa_dev_config_dump(struct device *dev, void *data) +{ + struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev); + struct vdpa_dev_dump_info *info = data; + int err; + + if (!vdev->mdev) + return 0; + if (info->idx < info->start_idx) { + info->idx++; + return 0; + } + err = vdpa_dev_config_fill(vdev, info->msg, NETLINK_CB(info->cb->skb).portid, + info->cb->nlh->nlmsg_seq, NLM_F_MULTI, + info->cb->extack); + if (err) + return err; + + info->idx++; + return 0; +} + +static int +vdpa_nl_cmd_dev_config_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct vdpa_dev_dump_info info; + + info.msg = msg; + info.cb = cb; + info.start_idx = cb->args[0]; + info.idx = 0; + + mutex_lock(&vdpa_dev_mutex); + bus_for_each_dev(&vdpa_bus, NULL, &info, vdpa_dev_config_dump); + mutex_unlock(&vdpa_dev_mutex); + cb->args[0] = info.idx; + return msg->len; +} + static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = { [VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type = NLA_NUL_STRING }, [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING }, @@ -692,6 +862,12 @@ static const struct genl_ops vdpa_nl_ops[] = { .doit = vdpa_nl_cmd_dev_get_doit, .dumpit = vdpa_nl_cmd_dev_get_dumpit, }, + { + .cmd = VDPA_CMD_DEV_CONFIG_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = vdpa_nl_cmd_dev_config_get_doit, + .dumpit = vdpa_nl_cmd_dev_config_get_dumpit, + }, }; static struct genl_family vdpa_nl_family __ro_after_init = { diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 267236aab34c..5cc5e501397f 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -63,6 +63,7 @@ struct vdpa_mgmt_dev; * @dev: underlying device * @dma_dev: the actual device that is performing DMA * @config: the configuration ops for this device. + * @cf_mutex: Protects get and set access to configuration layout. * @index: device index * @features_valid: were features initialized? for legacy guests * @use_va: indicate whether virtual address must be used by this device @@ -74,6 +75,7 @@ struct vdpa_device { struct device dev; struct device *dma_dev; const struct vdpa_config_ops *config; + struct mutex cf_mutex; /* Protects get/set config */ unsigned int index; bool features_valid; bool use_va; diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index e3b87879514c..a252f06f9dfd 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -17,6 +17,7 @@ enum vdpa_command { VDPA_CMD_DEV_NEW, VDPA_CMD_DEV_DEL, VDPA_CMD_DEV_GET, /* can dump */ + VDPA_CMD_DEV_CONFIG_GET, /* can dump */ }; enum vdpa_attr { @@ -34,6 +35,11 @@ enum vdpa_attr { VDPA_ATTR_DEV_MAX_VQ_SIZE, /* u16 */ VDPA_ATTR_DEV_MIN_VQ_SIZE, /* u16 */ + VDPA_ATTR_DEV_NET_CFG_MACADDR, /* binary */ + VDPA_ATTR_DEV_NET_STATUS, /* u8 */ + VDPA_ATTR_DEV_NET_CFG_MAX_VQP, /* u16 */ + VDPA_ATTR_DEV_NET_CFG_MTU, /* u16 */ + /* new attributes must be added above here */ VDPA_ATTR_MAX, }; From 960deb33be3d08e55a39e40e0286a51c7448e053 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 26 Oct 2021 20:55:14 +0300 Subject: [PATCH 07/12] vdpa: Use kernel coding style for structure comments As subsequent patch adds new structure field with comment, move the structure comment to follow kernel coding style. Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Acked-by: Jason Wang Reviewed-by: Stefano Garzarella Link: https://lore.kernel.org/r/20211026175519.87795-4-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 5cc5e501397f..fafb7202482c 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -411,10 +411,17 @@ struct vdpa_mgmtdev_ops { void (*dev_del)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev); }; +/** + * struct vdpa_mgmt_dev - vdpa management device + * @device: Management parent device + * @ops: operations supported by management device + * @id_table: Pointer to device id table of supported ids + * @list: list entry + */ struct vdpa_mgmt_dev { struct device *device; const struct vdpa_mgmtdev_ops *ops; - const struct virtio_device_id *id_table; /* supported ids */ + const struct virtio_device_id *id_table; struct list_head list; }; From d8ca2fa5be1bdb9d08cfe1f831cddb622a01dfd4 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 26 Oct 2021 20:55:15 +0300 Subject: [PATCH 08/12] vdpa: Enable user to set mac and mtu of vdpa device $ vdpa dev add name bar mgmtdev vdpasim_net mac 00:11:22:33:44:55 mtu 9000 $ vdpa dev config show bar: mac 00:11:22:33:44:55 link up link_announce false mtu 9000 $ vdpa dev config show -jp { "config": { "bar": { "mac": "00:11:22:33:44:55", "link ": "up", "link_announce ": false, "mtu": 9000, } } } Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Acked-by: Jason Wang Link: https://lore.kernel.org/r/20211026175519.87795-5-parav@nvidia.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- drivers/vdpa/ifcvf/ifcvf_main.c | 3 ++- drivers/vdpa/mlx5/net/mlx5_vnet.c | 3 ++- drivers/vdpa/vdpa.c | 38 ++++++++++++++++++++++++++-- drivers/vdpa/vdpa_sim/vdpa_sim_blk.c | 3 ++- drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 3 ++- drivers/vdpa/vdpa_user/vduse_dev.c | 3 ++- include/linux/vdpa.h | 17 ++++++++++++- 7 files changed, 62 insertions(+), 8 deletions(-) diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index dcd648e1f7e7..6dc75ca70b37 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -499,7 +499,8 @@ static u32 get_dev_type(struct pci_dev *pdev) return dev_type; } -static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) +static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, + const struct vdpa_dev_set_config *config) { struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev; struct ifcvf_adapter *adapter; diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index b5bd1a553256..6bbdc0ece707 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2482,7 +2482,8 @@ static int event_handler(struct notifier_block *nb, unsigned long event, void *p return ret; } -static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) +static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, + const struct vdpa_dev_set_config *add_config) { struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); struct virtio_net_config *config; diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 8fcbdda8590c..7332a74a4b00 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -14,7 +14,6 @@ #include #include #include -#include #include static LIST_HEAD(mdev_head); @@ -480,9 +479,15 @@ out: return msg->len; } +#define VDPA_DEV_NET_ATTRS_MASK ((1 << VDPA_ATTR_DEV_NET_CFG_MACADDR) | \ + (1 << VDPA_ATTR_DEV_NET_CFG_MTU)) + static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *info) { + struct vdpa_dev_set_config config = {}; + struct nlattr **nl_attrs = info->attrs; struct vdpa_mgmt_dev *mdev; + const u8 *macaddr; const char *name; int err = 0; @@ -491,6 +496,26 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); + if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]) { + macaddr = nla_data(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]); + memcpy(config.net.mac, macaddr, sizeof(config.net.mac)); + config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR); + } + if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]) { + config.net.mtu = + nla_get_u16(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]); + config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MTU); + } + + /* Skip checking capability if user didn't prefer to configure any + * device networking attributes. It is likely that user might have used + * a device specific method to configure such attributes or using device + * default attributes. + */ + if ((config.mask & VDPA_DEV_NET_ATTRS_MASK) && + !netlink_capable(skb, CAP_NET_ADMIN)) + return -EPERM; + mutex_lock(&vdpa_dev_mutex); mdev = vdpa_mgmtdev_get_from_attr(info->attrs); if (IS_ERR(mdev)) { @@ -498,8 +523,14 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i err = PTR_ERR(mdev); goto err; } + if ((config.mask & mdev->config_attr_mask) != config.mask) { + NL_SET_ERR_MSG_MOD(info->extack, + "All provided attributes are not supported"); + err = -EOPNOTSUPP; + goto err; + } - err = mdev->ops->dev_add(mdev, name); + err = mdev->ops->dev_add(mdev, name, &config); err: mutex_unlock(&vdpa_dev_mutex); return err; @@ -835,6 +866,9 @@ static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = { [VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type = NLA_NUL_STRING }, [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING }, [VDPA_ATTR_DEV_NAME] = { .type = NLA_STRING }, + [VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR, + /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */ + [VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68), }; static const struct genl_ops vdpa_nl_ops[] = { diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c index a790903f243e..42d401d43911 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c @@ -248,7 +248,8 @@ static struct device vdpasim_blk_mgmtdev = { .release = vdpasim_blk_mgmtdev_release, }; -static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) +static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, + const struct vdpa_dev_set_config *config) { struct vdpasim_dev_attr dev_attr = {}; struct vdpasim *simdev; diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index a1ab6163f7d1..d681e423e64f 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -126,7 +126,8 @@ static struct device vdpasim_net_mgmtdev = { .release = vdpasim_net_mgmtdev_release, }; -static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) +static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, + const struct vdpa_dev_set_config *config) { struct vdpasim_dev_attr dev_attr = {}; struct vdpasim *simdev; diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 841667a896dd..c9204c62f339 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -1503,7 +1503,8 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) return 0; } -static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) +static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, + const struct vdpa_dev_set_config *config) { struct vduse_dev *dev; int ret; diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index fafb7202482c..c3011ccda430 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -6,6 +6,8 @@ #include #include #include +#include +#include /** * struct vdpa_calllback - vDPA callback definition. @@ -93,6 +95,14 @@ struct vdpa_iova_range { u64 last; }; +struct vdpa_dev_set_config { + struct { + u8 mac[ETH_ALEN]; + u16 mtu; + } net; + u64 mask; +}; + /** * Corresponding file area for device memory mapping * @file: vma->vm_file for the mapping @@ -397,6 +407,7 @@ void vdpa_set_config(struct vdpa_device *dev, unsigned int offset, * @dev_add: Add a vdpa device using alloc and register * @mdev: parent device to use for device addition * @name: name of the new vdpa device + * @config: config attributes to apply to the device under creation * Driver need to add a new device using _vdpa_register_device() * after fully initializing the vdpa device. Driver must return 0 * on success or appropriate error code. @@ -407,7 +418,8 @@ void vdpa_set_config(struct vdpa_device *dev, unsigned int offset, * _vdpa_unregister_device(). */ struct vdpa_mgmtdev_ops { - int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name); + int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name, + const struct vdpa_dev_set_config *config); void (*dev_del)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev); }; @@ -416,12 +428,15 @@ struct vdpa_mgmtdev_ops { * @device: Management parent device * @ops: operations supported by management device * @id_table: Pointer to device id table of supported ids + * @config_attr_mask: bit mask of attributes of type enum vdpa_attr that + * management device support during dev_add callback * @list: list entry */ struct vdpa_mgmt_dev { struct device *device; const struct vdpa_mgmtdev_ops *ops; const struct virtio_device_id *id_table; + u64 config_attr_mask; struct list_head list; }; From 1138b9818efa8a3a9670680a1d75134a4f2758ce Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 26 Oct 2021 20:55:16 +0300 Subject: [PATCH 09/12] vdpa_sim_net: Enable user to set mac address and mtu Enable user to set the mac address and mtu so that each vdpa device can have its own user specified mac address and mtu. Now that user is enabled to set the mac address, remove the module parameter for same. And example of setting mac addr and mtu and view the configuration: $ vdpa mgmtdev show vdpasim_net: supported_classes net $ vdpa dev add name bar mgmtdev vdpasim_net mac 00:11:22:33:44:55 mtu 9000 $ vdpa dev config show bar: mac 00:11:22:33:44:55 link up link_announce false mtu 9000 Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Reviewed-by: Stefano Garzarella Link: https://lore.kernel.org/r/20211026175519.87795-6-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 35 +++++++++++++++------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index d681e423e64f..76dd24abc791 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "vdpa_sim.h" @@ -29,12 +30,6 @@ #define VDPASIM_NET_VQ_NUM 2 -static char *macaddr; -module_param(macaddr, charp, 0); -MODULE_PARM_DESC(macaddr, "Ethernet MAC address"); - -static u8 macaddr_buf[ETH_ALEN]; - static void vdpasim_net_work(struct work_struct *work) { struct vdpasim *vdpasim = container_of(work, struct vdpasim, work); @@ -112,9 +107,21 @@ static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config) { struct virtio_net_config *net_config = config; - net_config->mtu = cpu_to_vdpasim16(vdpasim, 1500); net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); - memcpy(net_config->mac, macaddr_buf, ETH_ALEN); +} + +static void vdpasim_net_setup_config(struct vdpasim *vdpasim, + const struct vdpa_dev_set_config *config) +{ + struct virtio_net_config *vio_config = vdpasim->config; + + if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) + memcpy(vio_config->mac, config->net.mac, ETH_ALEN); + if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MTU)) + vio_config->mtu = cpu_to_vdpasim16(vdpasim, config->net.mtu); + else + /* Setup default MTU to be 1500 */ + vio_config->mtu = cpu_to_vdpasim16(vdpasim, 1500); } static void vdpasim_net_mgmtdev_release(struct device *dev) @@ -147,6 +154,8 @@ static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, if (IS_ERR(simdev)) return PTR_ERR(simdev); + vdpasim_net_setup_config(simdev, config); + ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_NET_VQ_NUM); if (ret) goto reg_err; @@ -180,20 +189,14 @@ static struct vdpa_mgmt_dev mgmt_dev = { .device = &vdpasim_net_mgmtdev, .id_table = id_table, .ops = &vdpasim_net_mgmtdev_ops, + .config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR | + 1 << VDPA_ATTR_DEV_NET_CFG_MTU), }; static int __init vdpasim_net_init(void) { int ret; - if (macaddr) { - mac_pton(macaddr, macaddr_buf); - if (!is_valid_ether_addr(macaddr_buf)) - return -EADDRNOTAVAIL; - } else { - eth_random_addr(macaddr_buf); - } - ret = device_register(&vdpasim_net_mgmtdev); if (ret) return ret; From ef76eb83a17e803a66b64ac95b36ae48b3d17c22 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 26 Oct 2021 20:55:17 +0300 Subject: [PATCH 10/12] vdpa/mlx5: Fix clearing of VIRTIO_NET_F_MAC feature bit Cited patch in the fixes tag clears the features bit during reset. mlx5 vdpa device feature bits are static decided by device capabilities. These feature bits (including VIRTIO_NET_F_MAC) are initialized during device addition time. Clearing features bit in reset callback cleared the VIRTIO_NET_F_MAC. Due to this, MAC address provided by the device is not honored. Fix it by not clearing the static feature bits during reset. Fixes: 0686082dbf7a ("vdpa: Add reset callback in vdpa_config_ops") Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Link: https://lore.kernel.org/r/20211026175519.87795-7-parav@nvidia.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 6bbdc0ece707..8d1539728a59 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2194,7 +2194,6 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev) clear_vqs_ready(ndev); mlx5_vdpa_destroy_mr(&ndev->mvdev); ndev->mvdev.status = 0; - ndev->mvdev.mlx_features = 0; memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs)); ndev->mvdev.actual_features = 0; ++mvdev->generation; From a007d940040c0b3d1fcb5f39159c2b141b85eae0 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 26 Oct 2021 20:55:18 +0300 Subject: [PATCH 11/12] vdpa/mlx5: Support configuration of MAC Add code to accept MAC configuration through vdpa tool. The MAC is written into the config struct and later can be retrieved through get_config(). Examples: 1. Configure MAC while adding a device: $ vdpa dev add mgmtdev pci/0000:06:00.2 name vdpa0 mac 00:11:22:33:44:55 2. Show configured params: $ vdpa dev config show vdpa0: mac 00:11:22:33:44:55 link down link_announce false max_vq_pairs 8 mtu 1500 Signed-off-by: Eli Cohen Reviewed-by: Parav Pandit Acked-by: Jason Wang Link: https://lore.kernel.org/r/20211026175519.87795-8-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 8d1539728a59..cc027499c4d3 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -2524,17 +2525,20 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, goto err_mtu; ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu); - ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); - - err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); - if (err) - goto err_mtu; if (get_link_state(mvdev)) ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); else ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); + if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) { + memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN); + } else { + err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); + if (err) + goto err_mtu; + } + if (!is_zero_ether_addr(config->mac)) { pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); err = mlx5_mpfs_add_mac(pfmdev, config->mac); @@ -2632,6 +2636,7 @@ static int mlx5v_probe(struct auxiliary_device *adev, mgtdev->mgtdev.ops = &mdev_ops; mgtdev->mgtdev.device = mdev->device; mgtdev->mgtdev.id_table = id_table; + mgtdev->mgtdev.config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR); mgtdev->madev = madev; err = vdpa_mgmtdev_register(&mgtdev->mgtdev); From 540061ac79f0302ae91e44e6cd216cbaa3af1757 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 26 Oct 2021 20:55:19 +0300 Subject: [PATCH 12/12] vdpa/mlx5: Forward only packets with allowed MAC address Add rules to forward packets to the net device's TIR only if the destination MAC is equal to the configured MAC. This is required to prevent the netdevice from receiving traffic not destined to its configured MAC. Signed-off-by: Eli Cohen Reviewed-by: Parav Pandit Link: https://lore.kernel.org/r/20211026175519.87795-9-parav@nvidia.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 76 +++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 18 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index cc027499c4d3..9b7d8c721354 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -158,7 +158,8 @@ struct mlx5_vdpa_net { struct mutex reslock; struct mlx5_flow_table *rxft; struct mlx5_fc *rx_counter; - struct mlx5_flow_handle *rx_rule; + struct mlx5_flow_handle *rx_rule_ucast; + struct mlx5_flow_handle *rx_rule_mcast; bool setup; u32 cur_num_vqs; struct notifier_block nb; @@ -1383,21 +1384,33 @@ static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev) struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_act flow_act = {}; struct mlx5_flow_namespace *ns; + struct mlx5_flow_spec *spec; + void *headers_c; + void *headers_v; + u8 *dmac_c; + u8 *dmac_v; int err; - /* for now, one entry, match all, forward to tir */ - ft_attr.max_fte = 1; - ft_attr.autogroup.max_num_groups = 1; + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + ft_attr.max_fte = 2; + ft_attr.autogroup.max_num_groups = 2; ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); if (!ns) { - mlx5_vdpa_warn(&ndev->mvdev, "get flow namespace\n"); - return -EOPNOTSUPP; + mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n"); + err = -EOPNOTSUPP; + goto err_ns; } ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); - if (IS_ERR(ndev->rxft)) - return PTR_ERR(ndev->rxft); + if (IS_ERR(ndev->rxft)) { + err = PTR_ERR(ndev->rxft); + goto err_ns; + } ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false); if (IS_ERR(ndev->rx_counter)) { @@ -1405,37 +1418,64 @@ static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev) goto err_fc; } + headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); + dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); + memset(dmac_c, 0xff, ETH_ALEN); + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); + ether_addr_copy(dmac_v, ndev->config.mac); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR; dest[0].tir_num = ndev->res.tirn; dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[1].counter_id = mlx5_fc_id(ndev->rx_counter); - ndev->rx_rule = mlx5_add_flow_rules(ndev->rxft, NULL, &flow_act, dest, 2); - if (IS_ERR(ndev->rx_rule)) { - err = PTR_ERR(ndev->rx_rule); - ndev->rx_rule = NULL; - goto err_rule; + ndev->rx_rule_ucast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 2); + + if (IS_ERR(ndev->rx_rule_ucast)) { + err = PTR_ERR(ndev->rx_rule_ucast); + ndev->rx_rule_ucast = NULL; + goto err_rule_ucast; } + memset(dmac_c, 0, ETH_ALEN); + memset(dmac_v, 0, ETH_ALEN); + dmac_c[0] = 1; + dmac_v[0] = 1; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + ndev->rx_rule_mcast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 1); + if (IS_ERR(ndev->rx_rule_mcast)) { + err = PTR_ERR(ndev->rx_rule_mcast); + ndev->rx_rule_mcast = NULL; + goto err_rule_mcast; + } + + kvfree(spec); return 0; -err_rule: +err_rule_mcast: + mlx5_del_flow_rules(ndev->rx_rule_ucast); + ndev->rx_rule_ucast = NULL; +err_rule_ucast: mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter); err_fc: mlx5_destroy_flow_table(ndev->rxft); +err_ns: + kvfree(spec); return err; } static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev) { - if (!ndev->rx_rule) + if (!ndev->rx_rule_ucast) return; - mlx5_del_flow_rules(ndev->rx_rule); + mlx5_del_flow_rules(ndev->rx_rule_mcast); + ndev->rx_rule_mcast = NULL; + mlx5_del_flow_rules(ndev->rx_rule_ucast); + ndev->rx_rule_ucast = NULL; mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter); mlx5_destroy_flow_table(ndev->rxft); - - ndev->rx_rule = NULL; } static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)