virtio: fixes, features

IRQ bypass support for vdpa and IFC
 MLX5 vdpa driver
 Endian-ness fixes for virtio drivers
 Misc other fixes
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAl8yVEwPHG1zdEByZWRo
 YXQuY29tAAoJECgfDbjSjVRpNPEH/0Dtq1s1V4r/kxtLUoMophv9wuORpWCr98BQ
 2aOveTmwTOVdZVOiw2tzTgO9nbWx+cL2HvkU7Aajfpz5hh93Z2VOo2n4a7hBC79f
 rlc3GXiG+pMk5RfmqGofIHTU+D6ony4D5SXlUDurLdtEwunyuqZwABiWkZjdclZJ
 bv90IL8Upzbz0rxYr7k3z8UepdOCt7r4QS/o7STHZBjJRyylxmO/R2yTnh6PtpRK
 Q/z35wJBJ3SKc8X3Fi0VOOSeGNZOiypkkl9ZnLVY5lExNAU1+2MMn2UK119SlCDV
 MSxb7quYFF4cksXH1g77GMBNi1uADRh1dtFMZdkKhZGljGxKLxo=
 =6VTZ
 -----END PGP SIGNATURE-----

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio updates from Michael Tsirkin:

 - IRQ bypass support for vdpa and IFC

 - MLX5 vdpa driver

 - Endianness fixes for virtio drivers

 - Misc other fixes

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (71 commits)
  vdpa/mlx5: fix up endian-ness for mtu
  vdpa: Fix pointer math bug in vdpasim_get_config()
  vdpa/mlx5: Fix pointer math in mlx5_vdpa_get_config()
  vdpa/mlx5: fix memory allocation failure checks
  vdpa/mlx5: Fix uninitialised variable in core/mr.c
  vdpa_sim: init iommu lock
  virtio_config: fix up warnings on parisc
  vdpa/mlx5: Add VDPA driver for supported mlx5 devices
  vdpa/mlx5: Add shared memory registration code
  vdpa/mlx5: Add support library for mlx5 VDPA implementation
  vdpa/mlx5: Add hardware descriptive header file
  vdpa: Modify get_vq_state() to return error code
  net/vdpa: Use struct for set/get vq state
  vdpa: remove hard coded virtq num
  vdpasim: support batch updating
  vhost-vdpa: support IOTLB batching hints
  vhost-vdpa: support get/set backend features
  vhost: generialize backend features setting/getting
  vhost-vdpa: refine ioctl pre-processing
  vDPA: dont change vq irq after DRIVER_OK
  ...
This commit is contained in:
Linus Torvalds 2020-08-11 14:34:17 -07:00
commit 57b0779392
60 changed files with 3888 additions and 410 deletions

View file

@ -385,7 +385,7 @@ static irqreturn_t vu_req_interrupt(int irq, void *data)
}
break;
case VHOST_USER_SLAVE_IOTLB_MSG:
/* not supported - VIRTIO_F_IOMMU_PLATFORM */
/* not supported - VIRTIO_F_ACCESS_PLATFORM */
case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
/* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */
default:

View file

@ -10667,11 +10667,17 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
{
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
int ret;
irqfd->producer = prod;
kvm_arch_start_assignment(irqfd->kvm);
ret = kvm_x86_ops.update_pi_irte(irqfd->kvm,
prod->irq, irqfd->gsi, 1);
return kvm_x86_ops.update_pi_irte(irqfd->kvm,
prod->irq, irqfd->gsi, 1);
if (ret)
kvm_arch_end_assignment(irqfd->kvm);
return ret;
}
void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
@ -10694,6 +10700,8 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
if (ret)
printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
" fails: %d\n", irqfd->consumer.token, ret);
kvm_arch_end_assignment(irqfd->kvm);
}
int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,

View file

@ -204,8 +204,8 @@ static int virtcrypto_update_status(struct virtio_crypto *vcrypto)
u32 status;
int err;
virtio_cread(vcrypto->vdev,
struct virtio_crypto_config, status, &status);
virtio_cread_le(vcrypto->vdev,
struct virtio_crypto_config, status, &status);
/*
* Unknown status bits would be a host error and the driver
@ -323,31 +323,31 @@ static int virtcrypto_probe(struct virtio_device *vdev)
if (!vcrypto)
return -ENOMEM;
virtio_cread(vdev, struct virtio_crypto_config,
virtio_cread_le(vdev, struct virtio_crypto_config,
max_dataqueues, &max_data_queues);
if (max_data_queues < 1)
max_data_queues = 1;
virtio_cread(vdev, struct virtio_crypto_config,
max_cipher_key_len, &max_cipher_key_len);
virtio_cread(vdev, struct virtio_crypto_config,
max_auth_key_len, &max_auth_key_len);
virtio_cread(vdev, struct virtio_crypto_config,
max_size, &max_size);
virtio_cread(vdev, struct virtio_crypto_config,
crypto_services, &crypto_services);
virtio_cread(vdev, struct virtio_crypto_config,
cipher_algo_l, &cipher_algo_l);
virtio_cread(vdev, struct virtio_crypto_config,
cipher_algo_h, &cipher_algo_h);
virtio_cread(vdev, struct virtio_crypto_config,
hash_algo, &hash_algo);
virtio_cread(vdev, struct virtio_crypto_config,
mac_algo_l, &mac_algo_l);
virtio_cread(vdev, struct virtio_crypto_config,
mac_algo_h, &mac_algo_h);
virtio_cread(vdev, struct virtio_crypto_config,
aead_algo, &aead_algo);
virtio_cread_le(vdev, struct virtio_crypto_config,
max_cipher_key_len, &max_cipher_key_len);
virtio_cread_le(vdev, struct virtio_crypto_config,
max_auth_key_len, &max_auth_key_len);
virtio_cread_le(vdev, struct virtio_crypto_config,
max_size, &max_size);
virtio_cread_le(vdev, struct virtio_crypto_config,
crypto_services, &crypto_services);
virtio_cread_le(vdev, struct virtio_crypto_config,
cipher_algo_l, &cipher_algo_l);
virtio_cread_le(vdev, struct virtio_crypto_config,
cipher_algo_h, &cipher_algo_h);
virtio_cread_le(vdev, struct virtio_crypto_config,
hash_algo, &hash_algo);
virtio_cread_le(vdev, struct virtio_crypto_config,
mac_algo_l, &mac_algo_l);
virtio_cread_le(vdev, struct virtio_crypto_config,
mac_algo_h, &mac_algo_h);
virtio_cread_le(vdev, struct virtio_crypto_config,
aead_algo, &aead_algo);
/* Add virtio crypto device to global table */
err = virtcrypto_devmgr_add_dev(vcrypto);

View file

@ -39,8 +39,8 @@ static void virtio_gpu_config_changed_work_func(struct work_struct *work)
u32 events_read, events_clear = 0;
/* read the config space */
virtio_cread(vgdev->vdev, struct virtio_gpu_config,
events_read, &events_read);
virtio_cread_le(vgdev->vdev, struct virtio_gpu_config,
events_read, &events_read);
if (events_read & VIRTIO_GPU_EVENT_DISPLAY) {
if (vgdev->has_edid)
virtio_gpu_cmd_get_edids(vgdev);
@ -49,8 +49,8 @@ static void virtio_gpu_config_changed_work_func(struct work_struct *work)
drm_helper_hpd_irq_event(vgdev->ddev);
events_clear |= VIRTIO_GPU_EVENT_DISPLAY;
}
virtio_cwrite(vgdev->vdev, struct virtio_gpu_config,
events_clear, &events_clear);
virtio_cwrite_le(vgdev->vdev, struct virtio_gpu_config,
events_clear, &events_clear);
}
static void virtio_gpu_init_vq(struct virtio_gpu_queue *vgvq,
@ -165,8 +165,8 @@ int virtio_gpu_init(struct drm_device *dev)
}
/* get display info */
virtio_cread(vgdev->vdev, struct virtio_gpu_config,
num_scanouts, &num_scanouts);
virtio_cread_le(vgdev->vdev, struct virtio_gpu_config,
num_scanouts, &num_scanouts);
vgdev->num_scanouts = min_t(uint32_t, num_scanouts,
VIRTIO_GPU_MAX_SCANOUTS);
if (!vgdev->num_scanouts) {
@ -176,8 +176,8 @@ int virtio_gpu_init(struct drm_device *dev)
}
DRM_INFO("number of scanouts: %d\n", num_scanouts);
virtio_cread(vgdev->vdev, struct virtio_gpu_config,
num_capsets, &num_capsets);
virtio_cread_le(vgdev->vdev, struct virtio_gpu_config,
num_capsets, &num_capsets);
DRM_INFO("number of cap sets: %d\n", num_capsets);
virtio_gpu_modeset_init(vgdev);

View file

@ -141,7 +141,7 @@ static int virtio_gpu_object_shmem_init(struct virtio_gpu_device *vgdev,
struct virtio_gpu_mem_entry **ents,
unsigned int *nents)
{
bool use_dma_api = !virtio_has_iommu_quirk(vgdev->vdev);
bool use_dma_api = !virtio_has_dma_quirk(vgdev->vdev);
struct virtio_gpu_object_shmem *shmem = to_virtio_gpu_shmem(bo);
struct scatterlist *sg;
int si, ret;

View file

@ -599,7 +599,7 @@ void virtio_gpu_cmd_transfer_to_host_2d(struct virtio_gpu_device *vgdev,
struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(objs->objs[0]);
struct virtio_gpu_transfer_to_host_2d *cmd_p;
struct virtio_gpu_vbuffer *vbuf;
bool use_dma_api = !virtio_has_iommu_quirk(vgdev->vdev);
bool use_dma_api = !virtio_has_dma_quirk(vgdev->vdev);
struct virtio_gpu_object_shmem *shmem = to_virtio_gpu_shmem(bo);
if (use_dma_api)
@ -1015,7 +1015,7 @@ void virtio_gpu_cmd_transfer_to_host_3d(struct virtio_gpu_device *vgdev,
struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(objs->objs[0]);
struct virtio_gpu_transfer_host_3d *cmd_p;
struct virtio_gpu_vbuffer *vbuf;
bool use_dma_api = !virtio_has_iommu_quirk(vgdev->vdev);
bool use_dma_api = !virtio_has_dma_quirk(vgdev->vdev);
struct virtio_gpu_object_shmem *shmem = to_virtio_gpu_shmem(bo);
if (use_dma_api)

View file

@ -1010,8 +1010,8 @@ static int viommu_probe(struct virtio_device *vdev)
if (ret)
return ret;
virtio_cread(vdev, struct virtio_iommu_config, page_size_mask,
&viommu->pgsize_bitmap);
virtio_cread_le(vdev, struct virtio_iommu_config, page_size_mask,
&viommu->pgsize_bitmap);
if (!viommu->pgsize_bitmap) {
ret = -EINVAL;
@ -1022,25 +1022,25 @@ static int viommu_probe(struct virtio_device *vdev)
viommu->last_domain = ~0U;
/* Optional features */
virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE,
struct virtio_iommu_config, input_range.start,
&input_start);
virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE,
struct virtio_iommu_config, input_range.start,
&input_start);
virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE,
struct virtio_iommu_config, input_range.end,
&input_end);
virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE,
struct virtio_iommu_config, input_range.end,
&input_end);
virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE,
struct virtio_iommu_config, domain_range.start,
&viommu->first_domain);
virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE,
struct virtio_iommu_config, domain_range.start,
&viommu->first_domain);
virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE,
struct virtio_iommu_config, domain_range.end,
&viommu->last_domain);
virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE,
struct virtio_iommu_config, domain_range.end,
&viommu->last_domain);
virtio_cread_feature(vdev, VIRTIO_IOMMU_F_PROBE,
struct virtio_iommu_config, probe_size,
&viommu->probe_size);
virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_PROBE,
struct virtio_iommu_config, probe_size,
&viommu->probe_size);
viommu->geometry = (struct iommu_domain_geometry) {
.aperture_start = input_start,

View file

@ -2264,12 +2264,13 @@ static void virtnet_update_settings(struct virtnet_info *vi)
if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX))
return;
speed = virtio_cread32(vi->vdev, offsetof(struct virtio_net_config,
speed));
virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed);
if (ethtool_validate_speed(speed))
vi->speed = speed;
duplex = virtio_cread8(vi->vdev, offsetof(struct virtio_net_config,
duplex));
virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex);
if (ethtool_validate_duplex(duplex))
vi->duplex = duplex;
}

View file

@ -58,9 +58,9 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
goto out_err;
}
virtio_cread(vpmem->vdev, struct virtio_pmem_config,
virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
start, &vpmem->start);
virtio_cread(vpmem->vdev, struct virtio_pmem_config,
virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
size, &vpmem->size);
res.start = vpmem->start;

View file

@ -625,7 +625,10 @@ static void mlxbf_tmfifo_rxtx_header(struct mlxbf_tmfifo_vring *vring,
vdev_id = VIRTIO_ID_NET;
hdr_len = sizeof(struct virtio_net_hdr);
config = &fifo->vdev[vdev_id]->config.net;
if (ntohs(hdr.len) > config->mtu +
/* A legacy-only interface for now. */
if (ntohs(hdr.len) >
__virtio16_to_cpu(virtio_legacy_is_little_endian(),
config->mtu) +
MLXBF_TMFIFO_NET_L2_OVERHEAD)
return;
} else {
@ -1231,8 +1234,12 @@ static int mlxbf_tmfifo_probe(struct platform_device *pdev)
/* Create the network vdev. */
memset(&net_config, 0, sizeof(net_config));
net_config.mtu = ETH_DATA_LEN;
net_config.status = VIRTIO_NET_S_LINK_UP;
/* A legacy-only interface for now. */
net_config.mtu = __cpu_to_virtio16(virtio_legacy_is_little_endian(),
ETH_DATA_LEN);
net_config.status = __cpu_to_virtio16(virtio_legacy_is_little_endian(),
VIRTIO_NET_S_LINK_UP);
mlxbf_tmfifo_get_cfg_mac(net_config.mac);
rc = mlxbf_tmfifo_create_vdev(dev, fifo, VIRTIO_ID_NET,
MLXBF_TMFIFO_NET_FEATURES, &net_config,

View file

@ -754,14 +754,14 @@ static struct scsi_host_template virtscsi_host_template = {
#define virtscsi_config_get(vdev, fld) \
({ \
typeof(((struct virtio_scsi_config *)0)->fld) __val; \
__virtio_native_type(struct virtio_scsi_config, fld) __val; \
virtio_cread(vdev, struct virtio_scsi_config, fld, &__val); \
__val; \
})
#define virtscsi_config_set(vdev, fld, val) \
do { \
typeof(((struct virtio_scsi_config *)0)->fld) __val = (val); \
__virtio_native_type(struct virtio_scsi_config, fld) __val = (val); \
virtio_cwrite(vdev, struct virtio_scsi_config, fld, &__val); \
} while(0)

View file

@ -29,4 +29,23 @@ config IFCVF
To compile this driver as a module, choose M here: the module will
be called ifcvf.
config MLX5_VDPA
bool "MLX5 VDPA support library for ConnectX devices"
depends on MLX5_CORE
default n
help
Support library for Mellanox VDPA drivers. Provides code that is
common for all types of VDPA drivers. The following drivers are planned:
net, block.
config MLX5_VDPA_NET
tristate "vDPA driver for ConnectX devices"
depends on MLX5_VDPA
default n
help
VDPA network driver for ConnectX6 and newer. Provides offloading
of virtio net datapath such that descriptors put on the ring will
be executed by the hardware. It also supports a variety of stateless
offloads depending on the actual device used and firmware version.
endif # VDPA

View file

@ -2,3 +2,4 @@
obj-$(CONFIG_VDPA) += vdpa.o
obj-$(CONFIG_VDPA_SIM) += vdpa_sim/
obj-$(CONFIG_IFCVF) += ifcvf/
obj-$(CONFIG_MLX5_VDPA) += mlx5/

View file

@ -272,7 +272,7 @@ static int ifcvf_config_features(struct ifcvf_hw *hw)
return 0;
}
u64 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid)
u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid)
{
struct ifcvf_lm_cfg __iomem *ifcvf_lm;
void __iomem *avail_idx_addr;
@ -287,7 +287,7 @@ u64 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid)
return last_avail_idx;
}
int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u64 num)
int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num)
{
struct ifcvf_lm_cfg __iomem *ifcvf_lm;
void __iomem *avail_idx_addr;

View file

@ -29,7 +29,7 @@
(1ULL << VIRTIO_F_VERSION_1) | \
(1ULL << VIRTIO_NET_F_STATUS) | \
(1ULL << VIRTIO_F_ORDER_PLATFORM) | \
(1ULL << VIRTIO_F_IOMMU_PLATFORM) | \
(1ULL << VIRTIO_F_ACCESS_PLATFORM) | \
(1ULL << VIRTIO_NET_F_MRG_RXBUF))
/* Only one queue pair for now. */
@ -116,7 +116,7 @@ void ifcvf_set_status(struct ifcvf_hw *hw, u8 status);
void io_write64_twopart(u64 val, u32 *lo, u32 *hi);
void ifcvf_reset(struct ifcvf_hw *hw);
u64 ifcvf_get_features(struct ifcvf_hw *hw);
u64 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid);
int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u64 num);
u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid);
int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num);
struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw);
#endif /* _IFCVF_H_ */

View file

@ -50,8 +50,10 @@ static void ifcvf_free_irq(struct ifcvf_adapter *adapter, int queues)
int i;
for (i = 0; i < queues; i++)
for (i = 0; i < queues; i++) {
devm_free_irq(&pdev->dev, vf->vring[i].irq, &vf->vring[i]);
vf->vring[i].irq = -EINVAL;
}
ifcvf_free_irq_vectors(pdev);
}
@ -235,19 +237,21 @@ static u16 ifcvf_vdpa_get_vq_num_max(struct vdpa_device *vdpa_dev)
return IFCVF_QUEUE_MAX;
}
static u64 ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid)
static int ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid,
struct vdpa_vq_state *state)
{
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
return ifcvf_get_vq_state(vf, qid);
state->avail_index = ifcvf_get_vq_state(vf, qid);
return 0;
}
static int ifcvf_vdpa_set_vq_state(struct vdpa_device *vdpa_dev, u16 qid,
u64 num)
const struct vdpa_vq_state *state)
{
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
return ifcvf_set_vq_state(vf, qid, num);
return ifcvf_set_vq_state(vf, qid, state->avail_index);
}
static void ifcvf_vdpa_set_vq_cb(struct vdpa_device *vdpa_dev, u16 qid,
@ -352,6 +356,14 @@ static void ifcvf_vdpa_set_config_cb(struct vdpa_device *vdpa_dev,
vf->config_cb.private = cb->private;
}
static int ifcvf_vdpa_get_vq_irq(struct vdpa_device *vdpa_dev,
u16 qid)
{
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
return vf->vring[qid].irq;
}
/*
* IFCVF currently does't have on-chip IOMMU, so not
* implemented set_map()/dma_map()/dma_unmap()
@ -369,6 +381,7 @@ static const struct vdpa_config_ops ifc_vdpa_ops = {
.get_vq_ready = ifcvf_vdpa_get_vq_ready,
.set_vq_num = ifcvf_vdpa_set_vq_num,
.set_vq_address = ifcvf_vdpa_set_vq_address,
.get_vq_irq = ifcvf_vdpa_get_vq_irq,
.kick_vq = ifcvf_vdpa_kick_vq,
.get_generation = ifcvf_vdpa_get_generation,
.get_device_id = ifcvf_vdpa_get_device_id,
@ -384,7 +397,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
struct device *dev = &pdev->dev;
struct ifcvf_adapter *adapter;
struct ifcvf_hw *vf;
int ret;
int ret, i;
ret = pcim_enable_device(pdev);
if (ret) {
@ -420,7 +433,8 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
}
adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
dev, &ifc_vdpa_ops);
dev, &ifc_vdpa_ops,
IFCVF_MAX_QUEUE_PAIRS * 2);
if (adapter == NULL) {
IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
return -ENOMEM;
@ -441,6 +455,9 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto err;
}
for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++)
vf->vring[i].irq = -EINVAL;
ret = vdpa_register_device(&adapter->vdpa);
if (ret) {
IFCVF_ERR(pdev, "Failed to register ifcvf to vdpa bus");

View file

@ -0,0 +1,4 @@
subdir-ccflags-y += -I$(srctree)/drivers/vdpa/mlx5/core
obj-$(CONFIG_MLX5_VDPA_NET) += mlx5_vdpa.o
mlx5_vdpa-$(CONFIG_MLX5_VDPA_NET) += net/main.o net/mlx5_vnet.o core/resources.o core/mr.o

View file

@ -0,0 +1,91 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2020 Mellanox Technologies Ltd. */
#ifndef __MLX5_VDPA_H__
#define __MLX5_VDPA_H__
#include <linux/vdpa.h>
#include <linux/mlx5/driver.h>
struct mlx5_vdpa_direct_mr {
u64 start;
u64 end;
u32 perm;
struct mlx5_core_mkey mr;
struct sg_table sg_head;
int log_size;
int nsg;
struct list_head list;
u64 offset;
};
struct mlx5_vdpa_mr {
struct mlx5_core_mkey mkey;
/* list of direct MRs descendants of this indirect mr */
struct list_head head;
unsigned long num_directs;
unsigned long num_klms;
bool initialized;
/* serialize mkey creation and destruction */
struct mutex mkey_mtx;
};
struct mlx5_vdpa_resources {
u32 pdn;
struct mlx5_uars_page *uar;
void __iomem *kick_addr;
u16 uid;
u32 null_mkey;
bool valid;
};
struct mlx5_vdpa_dev {
struct vdpa_device vdev;
struct mlx5_core_dev *mdev;
struct mlx5_vdpa_resources res;
u64 mlx_features;
u64 actual_features;
u8 status;
u32 max_vqs;
u32 generation;
struct mlx5_vdpa_mr mr;
};
int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
int mlx5_vdpa_dealloc_pd(struct mlx5_vdpa_dev *dev, u32 pdn, u16 uid);
int mlx5_vdpa_get_null_mkey(struct mlx5_vdpa_dev *dev, u32 *null_mkey);
int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn);
void mlx5_vdpa_destroy_tis(struct mlx5_vdpa_dev *mvdev, u32 tisn);
int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *rqtn);
void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn);
int mlx5_vdpa_create_tir(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tirn);
void mlx5_vdpa_destroy_tir(struct mlx5_vdpa_dev *mvdev, u32 tirn);
int mlx5_vdpa_alloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 *tdn);
void mlx5_vdpa_dealloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 tdn);
int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev);
void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev);
int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey, u32 *in,
int inlen);
int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey);
int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
bool *change_map);
int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb);
void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev);
#define mlx5_vdpa_warn(__dev, format, ...) \
dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__, \
current->pid, ##__VA_ARGS__)
#define mlx5_vdpa_info(__dev, format, ...) \
dev_info((__dev)->mdev->device, "%s:%d:(pid %d): " format, __func__, __LINE__, \
current->pid, ##__VA_ARGS__)
#define mlx5_vdpa_dbg(__dev, format, ...) \
dev_debug((__dev)->mdev->device, "%s:%d:(pid %d): " format, __func__, __LINE__, \
current->pid, ##__VA_ARGS__)
#endif /* __MLX5_VDPA_H__ */

View file

@ -0,0 +1,168 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2020 Mellanox Technologies Ltd. */
#ifndef __MLX5_VDPA_IFC_H_
#define __MLX5_VDPA_IFC_H_
#include <linux/mlx5/mlx5_ifc.h>
enum {
MLX5_VIRTIO_Q_EVENT_MODE_NO_MSIX_MODE = 0x0,
MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE = 0x1,
MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE = 0x2,
};
enum {
MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT = 0x1, // do I check this caps?
MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED = 0x2,
};
enum {
MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT = 0,
MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED = 1,
};
struct mlx5_ifc_virtio_q_bits {
u8 virtio_q_type[0x8];
u8 reserved_at_8[0x5];
u8 event_mode[0x3];
u8 queue_index[0x10];
u8 full_emulation[0x1];
u8 virtio_version_1_0[0x1];
u8 reserved_at_22[0x2];
u8 offload_type[0x4];
u8 event_qpn_or_msix[0x18];
u8 doorbell_stride_index[0x10];
u8 queue_size[0x10];
u8 device_emulation_id[0x20];
u8 desc_addr[0x40];
u8 used_addr[0x40];
u8 available_addr[0x40];
u8 virtio_q_mkey[0x20];
u8 max_tunnel_desc[0x10];
u8 reserved_at_170[0x8];
u8 error_type[0x8];
u8 umem_1_id[0x20];
u8 umem_1_size[0x20];
u8 umem_1_offset[0x40];
u8 umem_2_id[0x20];
u8 umem_2_size[0x20];
u8 umem_2_offset[0x40];
u8 umem_3_id[0x20];
u8 umem_3_size[0x20];
u8 umem_3_offset[0x40];
u8 counter_set_id[0x20];
u8 reserved_at_320[0x8];
u8 pd[0x18];
u8 reserved_at_340[0xc0];
};
struct mlx5_ifc_virtio_net_q_object_bits {
u8 modify_field_select[0x40];
u8 reserved_at_40[0x20];
u8 vhca_id[0x10];
u8 reserved_at_70[0x10];
u8 queue_feature_bit_mask_12_3[0xa];
u8 dirty_bitmap_dump_enable[0x1];
u8 vhost_log_page[0x5];
u8 reserved_at_90[0xc];
u8 state[0x4];
u8 reserved_at_a0[0x5];
u8 queue_feature_bit_mask_2_0[0x3];
u8 tisn_or_qpn[0x18];
u8 dirty_bitmap_mkey[0x20];
u8 dirty_bitmap_size[0x20];
u8 dirty_bitmap_addr[0x40];
u8 hw_available_index[0x10];
u8 hw_used_index[0x10];
u8 reserved_at_160[0xa0];
struct mlx5_ifc_virtio_q_bits virtio_q_context;
};
struct mlx5_ifc_create_virtio_net_q_in_bits {
struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr;
struct mlx5_ifc_virtio_net_q_object_bits obj_context;
};
struct mlx5_ifc_create_virtio_net_q_out_bits {
struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr;
};
struct mlx5_ifc_destroy_virtio_net_q_in_bits {
struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_out_cmd_hdr;
};
struct mlx5_ifc_destroy_virtio_net_q_out_bits {
struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr;
};
struct mlx5_ifc_query_virtio_net_q_in_bits {
struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr;
};
struct mlx5_ifc_query_virtio_net_q_out_bits {
struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr;
struct mlx5_ifc_virtio_net_q_object_bits obj_context;
};
enum {
MLX5_VIRTQ_MODIFY_MASK_STATE = (u64)1 << 0,
MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_PARAMS = (u64)1 << 3,
MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_DUMP_ENABLE = (u64)1 << 4,
};
enum {
MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT = 0x0,
MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY = 0x1,
MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND = 0x2,
MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR = 0x3,
};
enum {
MLX5_RQTC_LIST_Q_TYPE_RQ = 0x0,
MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q = 0x1,
};
struct mlx5_ifc_modify_virtio_net_q_in_bits {
struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr;
struct mlx5_ifc_virtio_net_q_object_bits obj_context;
};
struct mlx5_ifc_modify_virtio_net_q_out_bits {
struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr;
};
#endif /* __MLX5_VDPA_IFC_H_ */

486
drivers/vdpa/mlx5/core/mr.c Normal file
View file

@ -0,0 +1,486 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2020 Mellanox Technologies Ltd. */
#include <linux/vdpa.h>
#include <linux/gcd.h>
#include <linux/string.h>
#include <linux/mlx5/qp.h>
#include "mlx5_vdpa.h"
/* DIV_ROUND_UP where the divider is a power of 2 give by its log base 2 value */
#define MLX5_DIV_ROUND_UP_POW2(_n, _s) \
({ \
u64 __s = _s; \
u64 _res; \
_res = (((_n) + (1 << (__s)) - 1) >> (__s)); \
_res; \
})
static int get_octo_len(u64 len, int page_shift)
{
u64 page_size = 1ULL << page_shift;
int npages;
npages = ALIGN(len, page_size) >> page_shift;
return (npages + 1) / 2;
}
static void fill_sg(struct mlx5_vdpa_direct_mr *mr, void *in)
{
struct scatterlist *sg;
__be64 *pas;
int i;
pas = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
for_each_sg(mr->sg_head.sgl, sg, mr->nsg, i)
(*pas) = cpu_to_be64(sg_dma_address(sg));
}
static void mlx5_set_access_mode(void *mkc, int mode)
{
MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
MLX5_SET(mkc, mkc, access_mode_4_2, mode >> 2);
}
static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt)
{
struct scatterlist *sg;
int i;
for_each_sg(mr->sg_head.sgl, sg, mr->nsg, i)
mtt[i] = cpu_to_be64(sg_dma_address(sg));
}
static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
{
int inlen;
void *mkc;
void *in;
int err;
inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16);
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
fill_sg(mr, in);
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, lw, !!(mr->perm & VHOST_MAP_WO));
MLX5_SET(mkc, mkc, lr, !!(mr->perm & VHOST_MAP_RO));
mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_MTT);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
MLX5_SET64(mkc, mkc, start_addr, mr->offset);
MLX5_SET64(mkc, mkc, len, mr->end - mr->start);
MLX5_SET(mkc, mkc, log_page_size, mr->log_size);
MLX5_SET(mkc, mkc, translations_octword_size,
get_octo_len(mr->end - mr->start, mr->log_size));
MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
get_octo_len(mr->end - mr->start, mr->log_size));
populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt));
err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen);
kvfree(in);
if (err) {
mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n");
return err;
}
return 0;
}
static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
{
mlx5_vdpa_destroy_mkey(mvdev, &mr->mr);
}
static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
{
return max_t(u64, map->start, mr->start);
}
static u64 map_end(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
{
return min_t(u64, map->last + 1, mr->end);
}
static u64 maplen(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
{
return map_end(map, mr) - map_start(map, mr);
}
#define MLX5_VDPA_INVALID_START_ADDR ((u64)-1)
#define MLX5_VDPA_INVALID_LEN ((u64)-1)
static u64 indir_start_addr(struct mlx5_vdpa_mr *mkey)
{
struct mlx5_vdpa_direct_mr *s;
s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
if (!s)
return MLX5_VDPA_INVALID_START_ADDR;
return s->start;
}
static u64 indir_len(struct mlx5_vdpa_mr *mkey)
{
struct mlx5_vdpa_direct_mr *s;
struct mlx5_vdpa_direct_mr *e;
s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
if (!s)
return MLX5_VDPA_INVALID_LEN;
e = list_last_entry(&mkey->head, struct mlx5_vdpa_direct_mr, list);
return e->end - s->start;
}
#define LOG_MAX_KLM_SIZE 30
#define MAX_KLM_SIZE BIT(LOG_MAX_KLM_SIZE)
static u32 klm_bcount(u64 size)
{
return (u32)size;
}
static void fill_indir(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey, void *in)
{
struct mlx5_vdpa_direct_mr *dmr;
struct mlx5_klm *klmarr;
struct mlx5_klm *klm;
bool first = true;
u64 preve;
int i;
klmarr = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
i = 0;
list_for_each_entry(dmr, &mkey->head, list) {
again:
klm = &klmarr[i++];
if (first) {
preve = dmr->start;
first = false;
}
if (preve == dmr->start) {
klm->key = cpu_to_be32(dmr->mr.key);
klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start));
preve = dmr->end;
} else {
klm->key = cpu_to_be32(mvdev->res.null_mkey);
klm->bcount = cpu_to_be32(klm_bcount(dmr->start - preve));
preve = dmr->start;
goto again;
}
}
}
static int klm_byte_size(int nklms)
{
return 16 * ALIGN(nklms, 4);
}
static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
{
int inlen;
void *mkc;
void *in;
int err;
u64 start;
u64 len;
start = indir_start_addr(mr);
len = indir_len(mr);
if (start == MLX5_VDPA_INVALID_START_ADDR || len == MLX5_VDPA_INVALID_LEN)
return -EINVAL;
inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + klm_byte_size(mr->num_klms);
in = kzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, lw, 1);
MLX5_SET(mkc, mkc, lr, 1);
mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_KLMS);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
MLX5_SET64(mkc, mkc, start_addr, start);
MLX5_SET64(mkc, mkc, len, len);
MLX5_SET(mkc, mkc, translations_octword_size, klm_byte_size(mr->num_klms) / 16);
MLX5_SET(create_mkey_in, in, translations_octword_actual_size, mr->num_klms);
fill_indir(mvdev, mr, in);
err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
kfree(in);
return err;
}
static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey)
{
mlx5_vdpa_destroy_mkey(mvdev, &mkey->mkey);
}
static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr,
struct vhost_iotlb *iotlb)
{
struct vhost_iotlb_map *map;
unsigned long lgcd = 0;
int log_entity_size;
unsigned long size;
u64 start = 0;
int err;
struct page *pg;
unsigned int nsg;
int sglen;
u64 pa;
u64 paend;
struct scatterlist *sg;
struct device *dma = mvdev->mdev->device;
int ret;
for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) {
size = maplen(map, mr);
lgcd = gcd(lgcd, size);
start += size;
}
log_entity_size = ilog2(lgcd);
sglen = 1 << log_entity_size;
nsg = MLX5_DIV_ROUND_UP_POW2(mr->end - mr->start, log_entity_size);
err = sg_alloc_table(&mr->sg_head, nsg, GFP_KERNEL);
if (err)
return err;
sg = mr->sg_head.sgl;
for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) {
paend = map->addr + maplen(map, mr);
for (pa = map->addr; pa < paend; pa += sglen) {
pg = pfn_to_page(__phys_to_pfn(pa));
if (!sg) {
mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n",
map->start, map->last + 1);
err = -ENOMEM;
goto err_map;
}
sg_set_page(sg, pg, sglen, 0);
sg = sg_next(sg);
if (!sg)
goto done;
}
}
done:
mr->log_size = log_entity_size;
mr->nsg = nsg;
ret = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
if (!ret)
goto err_map;
err = create_direct_mr(mvdev, mr);
if (err)
goto err_direct;
return 0;
err_direct:
dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
err_map:
sg_free_table(&mr->sg_head);
return err;
}
static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
{
struct device *dma = mvdev->mdev->device;
destroy_direct_mr(mvdev, mr);
dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
sg_free_table(&mr->sg_head);
}
static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 perm,
struct vhost_iotlb *iotlb)
{
struct mlx5_vdpa_mr *mr = &mvdev->mr;
struct mlx5_vdpa_direct_mr *dmr;
struct mlx5_vdpa_direct_mr *n;
LIST_HEAD(tmp);
u64 st;
u64 sz;
int err;
int i = 0;
st = start;
while (size) {
sz = (u32)min_t(u64, MAX_KLM_SIZE, size);
dmr = kzalloc(sizeof(*dmr), GFP_KERNEL);
if (!dmr) {
err = -ENOMEM;
goto err_alloc;
}
dmr->start = st;
dmr->end = st + sz;
dmr->perm = perm;
err = map_direct_mr(mvdev, dmr, iotlb);
if (err) {
kfree(dmr);
goto err_alloc;
}
list_add_tail(&dmr->list, &tmp);
size -= sz;
mr->num_directs++;
mr->num_klms++;
st += sz;
i++;
}
list_splice_tail(&tmp, &mr->head);
return 0;
err_alloc:
list_for_each_entry_safe(dmr, n, &mr->head, list) {
list_del_init(&dmr->list);
unmap_direct_mr(mvdev, dmr);
kfree(dmr);
}
return err;
}
/* The iotlb pointer contains a list of maps. Go over the maps, possibly
* merging mergeable maps, and create direct memory keys that provide the
* device access to memory. The direct mkeys are then referred to by the
* indirect memory key that provides access to the enitre address space given
* by iotlb.
*/
static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
{
struct mlx5_vdpa_mr *mr = &mvdev->mr;
struct mlx5_vdpa_direct_mr *dmr;
struct mlx5_vdpa_direct_mr *n;
struct vhost_iotlb_map *map;
u32 pperm = U16_MAX;
u64 last = U64_MAX;
u64 ps = U64_MAX;
u64 pe = U64_MAX;
u64 start = 0;
int err = 0;
int nnuls;
if (mr->initialized)
return 0;
INIT_LIST_HEAD(&mr->head);
for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
map = vhost_iotlb_itree_next(map, start, last)) {
start = map->start;
if (pe == map->start && pperm == map->perm) {
pe = map->last + 1;
} else {
if (ps != U64_MAX) {
if (pe < map->start) {
/* We have a hole in the map. Check how
* many null keys are required to fill it.
*/
nnuls = MLX5_DIV_ROUND_UP_POW2(map->start - pe,
LOG_MAX_KLM_SIZE);
mr->num_klms += nnuls;
}
err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb);
if (err)
goto err_chain;
}
ps = map->start;
pe = map->last + 1;
pperm = map->perm;
}
}
err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb);
if (err)
goto err_chain;
/* Create the memory key that defines the guests's address space. This
* memory key refers to the direct keys that contain the MTT
* translations
*/
err = create_indirect_key(mvdev, mr);
if (err)
goto err_chain;
mr->initialized = true;
return 0;
err_chain:
list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
list_del_init(&dmr->list);
unmap_direct_mr(mvdev, dmr);
kfree(dmr);
}
return err;
}
int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
{
struct mlx5_vdpa_mr *mr = &mvdev->mr;
int err;
mutex_lock(&mr->mkey_mtx);
err = _mlx5_vdpa_create_mr(mvdev, iotlb);
mutex_unlock(&mr->mkey_mtx);
return err;
}
void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
{
struct mlx5_vdpa_mr *mr = &mvdev->mr;
struct mlx5_vdpa_direct_mr *dmr;
struct mlx5_vdpa_direct_mr *n;
mutex_lock(&mr->mkey_mtx);
if (!mr->initialized)
goto out;
destroy_indirect_key(mvdev, mr);
list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
list_del_init(&dmr->list);
unmap_direct_mr(mvdev, dmr);
kfree(dmr);
}
memset(mr, 0, sizeof(*mr));
mr->initialized = false;
out:
mutex_unlock(&mr->mkey_mtx);
}
static bool map_empty(struct vhost_iotlb *iotlb)
{
return !vhost_iotlb_itree_first(iotlb, 0, U64_MAX);
}
int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
bool *change_map)
{
struct mlx5_vdpa_mr *mr = &mvdev->mr;
int err = 0;
*change_map = false;
if (map_empty(iotlb)) {
mlx5_vdpa_destroy_mr(mvdev);
return 0;
}
mutex_lock(&mr->mkey_mtx);
if (mr->initialized) {
mlx5_vdpa_info(mvdev, "memory map update\n");
*change_map = true;
}
if (!*change_map)
err = _mlx5_vdpa_create_mr(mvdev, iotlb);
mutex_unlock(&mr->mkey_mtx);
return err;
}

View file

@ -0,0 +1,284 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2020 Mellanox Technologies Ltd. */
#include <linux/mlx5/driver.h>
#include "mlx5_vdpa.h"
static int alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid)
{
struct mlx5_core_dev *mdev = dev->mdev;
u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {};
u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {};
int err;
MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
MLX5_SET(alloc_pd_in, in, uid, uid);
err = mlx5_cmd_exec_inout(mdev, alloc_pd, in, out);
if (!err)
*pdn = MLX5_GET(alloc_pd_out, out, pd);
return err;
}
static int dealloc_pd(struct mlx5_vdpa_dev *dev, u32 pdn, u16 uid)
{
u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {};
struct mlx5_core_dev *mdev = dev->mdev;
MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD);
MLX5_SET(dealloc_pd_in, in, pd, pdn);
MLX5_SET(dealloc_pd_in, in, uid, uid);
return mlx5_cmd_exec_in(mdev, dealloc_pd, in);
}
static int get_null_mkey(struct mlx5_vdpa_dev *dev, u32 *null_mkey)
{
u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {};
struct mlx5_core_dev *mdev = dev->mdev;
int err;
MLX5_SET(query_special_contexts_in, in, opcode, MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
err = mlx5_cmd_exec_inout(mdev, query_special_contexts, in, out);
if (!err)
*null_mkey = MLX5_GET(query_special_contexts_out, out, null_mkey);
return err;
}
static int create_uctx(struct mlx5_vdpa_dev *mvdev, u16 *uid)
{
u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {};
int inlen;
void *in;
int err;
/* 0 means not supported */
if (!MLX5_CAP_GEN(mvdev->mdev, log_max_uctx))
return -EOPNOTSUPP;
inlen = MLX5_ST_SZ_BYTES(create_uctx_in);
in = kzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX);
MLX5_SET(create_uctx_in, in, uctx.cap, MLX5_UCTX_CAP_RAW_TX);
err = mlx5_cmd_exec(mvdev->mdev, in, inlen, out, sizeof(out));
kfree(in);
if (!err)
*uid = MLX5_GET(create_uctx_out, out, uid);
return err;
}
static void destroy_uctx(struct mlx5_vdpa_dev *mvdev, u32 uid)
{
u32 out[MLX5_ST_SZ_DW(destroy_uctx_out)] = {};
u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {};
MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX);
MLX5_SET(destroy_uctx_in, in, uid, uid);
mlx5_cmd_exec(mvdev->mdev, in, sizeof(in), out, sizeof(out));
}
int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn)
{
u32 out[MLX5_ST_SZ_DW(create_tis_out)] = {};
int err;
MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS);
MLX5_SET(create_tis_in, in, uid, mvdev->res.uid);
err = mlx5_cmd_exec_inout(mvdev->mdev, create_tis, in, out);
if (!err)
*tisn = MLX5_GET(create_tis_out, out, tisn);
return err;
}
void mlx5_vdpa_destroy_tis(struct mlx5_vdpa_dev *mvdev, u32 tisn)
{
u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {};
MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS);
MLX5_SET(destroy_tis_in, in, uid, mvdev->res.uid);
MLX5_SET(destroy_tis_in, in, tisn, tisn);
mlx5_cmd_exec_in(mvdev->mdev, destroy_tis, in);
}
int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *rqtn)
{
u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {};
int err;
MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
err = mlx5_cmd_exec(mvdev->mdev, in, inlen, out, sizeof(out));
if (!err)
*rqtn = MLX5_GET(create_rqt_out, out, rqtn);
return err;
}
void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn)
{
u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {};
MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
MLX5_SET(destroy_rqt_in, in, uid, mvdev->res.uid);
MLX5_SET(destroy_rqt_in, in, rqtn, rqtn);
mlx5_cmd_exec_in(mvdev->mdev, destroy_rqt, in);
}
int mlx5_vdpa_create_tir(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tirn)
{
u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {};
int err;
MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
err = mlx5_cmd_exec_inout(mvdev->mdev, create_tir, in, out);
if (!err)
*tirn = MLX5_GET(create_tir_out, out, tirn);
return err;
}
void mlx5_vdpa_destroy_tir(struct mlx5_vdpa_dev *mvdev, u32 tirn)
{
u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {};
MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR);
MLX5_SET(destroy_tir_in, in, uid, mvdev->res.uid);
MLX5_SET(destroy_tir_in, in, tirn, tirn);
mlx5_cmd_exec_in(mvdev->mdev, destroy_tir, in);
}
int mlx5_vdpa_alloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 *tdn)
{
u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {};
u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {};
int err;
MLX5_SET(alloc_transport_domain_in, in, opcode, MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN);
MLX5_SET(alloc_transport_domain_in, in, uid, mvdev->res.uid);
err = mlx5_cmd_exec_inout(mvdev->mdev, alloc_transport_domain, in, out);
if (!err)
*tdn = MLX5_GET(alloc_transport_domain_out, out, transport_domain);
return err;
}
void mlx5_vdpa_dealloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 tdn)
{
u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {};
MLX5_SET(dealloc_transport_domain_in, in, opcode, MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
MLX5_SET(dealloc_transport_domain_in, in, uid, mvdev->res.uid);
MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn);
mlx5_cmd_exec_in(mvdev->mdev, dealloc_transport_domain, in);
}
int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey, u32 *in,
int inlen)
{
u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {};
u32 mkey_index;
void *mkc;
int err;
MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
err = mlx5_cmd_exec(mvdev->mdev, in, inlen, lout, sizeof(lout));
if (err)
return err;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
mkey->size = MLX5_GET64(mkc, mkc, len);
mkey->key |= mlx5_idx_to_mkey(mkey_index);
mkey->pd = MLX5_GET(mkc, mkc, pd);
return 0;
}
int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey)
{
u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {};
MLX5_SET(destroy_mkey_in, in, uid, mvdev->res.uid);
MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
return mlx5_cmd_exec_in(mvdev->mdev, destroy_mkey, in);
}
int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
{
u64 offset = MLX5_CAP64_DEV_VDPA_EMULATION(mvdev->mdev, doorbell_bar_offset);
struct mlx5_vdpa_resources *res = &mvdev->res;
struct mlx5_core_dev *mdev = mvdev->mdev;
u64 kick_addr;
int err;
if (res->valid) {
mlx5_vdpa_warn(mvdev, "resources already allocated\n");
return -EINVAL;
}
mutex_init(&mvdev->mr.mkey_mtx);
res->uar = mlx5_get_uars_page(mdev);
if (IS_ERR(res->uar)) {
err = PTR_ERR(res->uar);
goto err_uars;
}
err = create_uctx(mvdev, &res->uid);
if (err)
goto err_uctx;
err = alloc_pd(mvdev, &res->pdn, res->uid);
if (err)
goto err_pd;
err = get_null_mkey(mvdev, &res->null_mkey);
if (err)
goto err_key;
kick_addr = pci_resource_start(mdev->pdev, 0) + offset;
res->kick_addr = ioremap(kick_addr, PAGE_SIZE);
if (!res->kick_addr) {
err = -ENOMEM;
goto err_key;
}
res->valid = true;
return 0;
err_key:
dealloc_pd(mvdev, res->pdn, res->uid);
err_pd:
destroy_uctx(mvdev, res->uid);
err_uctx:
mlx5_put_uars_page(mdev, res->uar);
err_uars:
mutex_destroy(&mvdev->mr.mkey_mtx);
return err;
}
void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev)
{
struct mlx5_vdpa_resources *res = &mvdev->res;
if (!res->valid)
return;
iounmap(res->kick_addr);
res->kick_addr = NULL;
dealloc_pd(mvdev, res->pdn, res->uid);
destroy_uctx(mvdev, res->uid);
mlx5_put_uars_page(mvdev->mdev, res->uar);
mutex_destroy(&mvdev->mr.mkey_mtx);
res->valid = false;
}

View file

@ -0,0 +1,76 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2020 Mellanox Technologies Ltd. */
#include <linux/module.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/device.h>
#include "mlx5_vdpa_ifc.h"
#include "mlx5_vnet.h"
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox VDPA driver");
MODULE_LICENSE("Dual BSD/GPL");
static bool required_caps_supported(struct mlx5_core_dev *mdev)
{
u8 event_mode;
u64 got;
got = MLX5_CAP_GEN_64(mdev, general_obj_types);
if (!(got & MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q))
return false;
event_mode = MLX5_CAP_DEV_VDPA_EMULATION(mdev, event_mode);
if (!(event_mode & MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE))
return false;
if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, eth_frame_offload_type))
return false;
return true;
}
static void *mlx5_vdpa_add(struct mlx5_core_dev *mdev)
{
struct mlx5_vdpa_dev *vdev;
if (mlx5_core_is_pf(mdev))
return NULL;
if (!required_caps_supported(mdev)) {
dev_info(mdev->device, "virtio net emulation not supported\n");
return NULL;
}
vdev = mlx5_vdpa_add_dev(mdev);
if (IS_ERR(vdev))
return NULL;
return vdev;
}
static void mlx5_vdpa_remove(struct mlx5_core_dev *mdev, void *context)
{
struct mlx5_vdpa_dev *vdev = context;
mlx5_vdpa_remove_dev(vdev);
}
static struct mlx5_interface mlx5_vdpa_interface = {
.add = mlx5_vdpa_add,
.remove = mlx5_vdpa_remove,
.protocol = MLX5_INTERFACE_PROTOCOL_VDPA,
};
static int __init mlx5_vdpa_init(void)
{
return mlx5_register_interface(&mlx5_vdpa_interface);
}
static void __exit mlx5_vdpa_exit(void)
{
mlx5_unregister_interface(&mlx5_vdpa_interface);
}
module_init(mlx5_vdpa_init);
module_exit(mlx5_vdpa_exit);

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,24 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2020 Mellanox Technologies Ltd. */
#ifndef __MLX5_VNET_H_
#define __MLX5_VNET_H_
#include <linux/vdpa.h>
#include <linux/virtio_net.h>
#include <linux/vringh.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cq.h>
#include <linux/mlx5/qp.h>
#include "mlx5_vdpa.h"
static inline u32 mlx5_vdpa_max_qps(int max_vqs)
{
return max_vqs / 2;
}
#define to_mlx5_vdpa_ndev(__mvdev) container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev);
void mlx5_vdpa_remove_dev(struct mlx5_vdpa_dev *mvdev);
#endif /* __MLX5_VNET_H_ */

View file

@ -61,6 +61,7 @@ static void vdpa_release_dev(struct device *d)
* initialized but before registered.
* @parent: the parent device
* @config: the bus operations that is supported by this device
* @nvqs: number of virtqueues supported by this device
* @size: size of the parent structure that contains private data
*
* Driver should use vdpa_alloc_device() wrapper macro instead of
@ -71,6 +72,7 @@ static void vdpa_release_dev(struct device *d)
*/
struct vdpa_device *__vdpa_alloc_device(struct device *parent,
const struct vdpa_config_ops *config,
int nvqs,
size_t size)
{
struct vdpa_device *vdev;
@ -96,6 +98,8 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
vdev->dev.release = vdpa_release_dev;
vdev->index = err;
vdev->config = config;
vdev->features_valid = false;
vdev->nvqs = nvqs;
err = dev_set_name(&vdev->dev, "vdpa%u", vdev->index);
if (err)

View file

@ -24,6 +24,7 @@
#include <linux/etherdevice.h>
#include <linux/vringh.h>
#include <linux/vdpa.h>
#include <linux/virtio_byteorder.h>
#include <linux/vhost_iotlb.h>
#include <uapi/linux/virtio_config.h>
#include <uapi/linux/virtio_net.h>
@ -33,6 +34,10 @@
#define DRV_DESC "vDPA Device Simulator"
#define DRV_LICENSE "GPL v2"
static int batch_mapping = 1;
module_param(batch_mapping, int, 0444);
MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable");
struct vdpasim_virtqueue {
struct vringh vring;
struct vringh_kiov iov;
@ -55,12 +60,12 @@ struct vdpasim_virtqueue {
static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) |
(1ULL << VIRTIO_F_VERSION_1) |
(1ULL << VIRTIO_F_IOMMU_PLATFORM);
(1ULL << VIRTIO_F_ACCESS_PLATFORM);
/* State of each vdpasim device */
struct vdpasim {
struct vdpa_device vdpa;
struct vdpasim_virtqueue vqs[2];
struct vdpasim_virtqueue vqs[VDPASIM_VQ_NUM];
struct work_struct work;
/* spinlock to synchronize virtqueue state */
spinlock_t lock;
@ -70,8 +75,27 @@ struct vdpasim {
u32 status;
u32 generation;
u64 features;
/* spinlock to synchronize iommu table */
spinlock_t iommu_lock;
};
/* TODO: cross-endian support */
static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim)
{
return virtio_legacy_is_little_endian() ||
(vdpasim->features & (1ULL << VIRTIO_F_VERSION_1));
}
static inline u16 vdpasim16_to_cpu(struct vdpasim *vdpasim, __virtio16 val)
{
return __virtio16_to_cpu(vdpasim_is_little_endian(vdpasim), val);
}
static inline __virtio16 cpu_to_vdpasim16(struct vdpasim *vdpasim, u16 val)
{
return __cpu_to_virtio16(vdpasim_is_little_endian(vdpasim), val);
}
static struct vdpasim *vdpasim_dev;
static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
@ -118,7 +142,9 @@ static void vdpasim_reset(struct vdpasim *vdpasim)
for (i = 0; i < VDPASIM_VQ_NUM; i++)
vdpasim_vq_reset(&vdpasim->vqs[i]);
spin_lock(&vdpasim->iommu_lock);
vhost_iotlb_reset(vdpasim->iommu);
spin_unlock(&vdpasim->iommu_lock);
vdpasim->features = 0;
vdpasim->status = 0;
@ -236,8 +262,10 @@ static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page,
/* For simplicity, use identical mapping to avoid e.g iova
* allocator.
*/
spin_lock(&vdpasim->iommu_lock);
ret = vhost_iotlb_add_range(iommu, pa, pa + size - 1,
pa, dir_to_perm(dir));
spin_unlock(&vdpasim->iommu_lock);
if (ret)
return DMA_MAPPING_ERROR;
@ -251,8 +279,10 @@ static void vdpasim_unmap_page(struct device *dev, dma_addr_t dma_addr,
struct vdpasim *vdpasim = dev_to_sim(dev);
struct vhost_iotlb *iommu = vdpasim->iommu;
spin_lock(&vdpasim->iommu_lock);
vhost_iotlb_del_range(iommu, (u64)dma_addr,
(u64)dma_addr + size - 1);
spin_unlock(&vdpasim->iommu_lock);
}
static void *vdpasim_alloc_coherent(struct device *dev, size_t size,
@ -264,9 +294,10 @@ static void *vdpasim_alloc_coherent(struct device *dev, size_t size,
void *addr = kmalloc(size, flag);
int ret;
if (!addr)
spin_lock(&vdpasim->iommu_lock);
if (!addr) {
*dma_addr = DMA_MAPPING_ERROR;
else {
} else {
u64 pa = virt_to_phys(addr);
ret = vhost_iotlb_add_range(iommu, (u64)pa,
@ -279,6 +310,7 @@ static void *vdpasim_alloc_coherent(struct device *dev, size_t size,
} else
*dma_addr = (dma_addr_t)pa;
}
spin_unlock(&vdpasim->iommu_lock);
return addr;
}
@ -290,8 +322,11 @@ static void vdpasim_free_coherent(struct device *dev, size_t size,
struct vdpasim *vdpasim = dev_to_sim(dev);
struct vhost_iotlb *iommu = vdpasim->iommu;
spin_lock(&vdpasim->iommu_lock);
vhost_iotlb_del_range(iommu, (u64)dma_addr,
(u64)dma_addr + size - 1);
spin_unlock(&vdpasim->iommu_lock);
kfree(phys_to_virt((uintptr_t)dma_addr));
}
@ -303,21 +338,27 @@ static const struct dma_map_ops vdpasim_dma_ops = {
};
static const struct vdpa_config_ops vdpasim_net_config_ops;
static const struct vdpa_config_ops vdpasim_net_batch_config_ops;
static struct vdpasim *vdpasim_create(void)
{
struct virtio_net_config *config;
const struct vdpa_config_ops *ops;
struct vdpasim *vdpasim;
struct device *dev;
int ret = -ENOMEM;
vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL,
&vdpasim_net_config_ops);
if (batch_mapping)
ops = &vdpasim_net_batch_config_ops;
else
ops = &vdpasim_net_config_ops;
vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, VDPASIM_VQ_NUM);
if (!vdpasim)
goto err_alloc;
INIT_WORK(&vdpasim->work, vdpasim_work);
spin_lock_init(&vdpasim->lock);
spin_lock_init(&vdpasim->iommu_lock);
dev = &vdpasim->vdpa.dev;
dev->coherent_dma_mask = DMA_BIT_MASK(64);
@ -331,10 +372,7 @@ static struct vdpasim *vdpasim_create(void)
if (!vdpasim->buffer)
goto err_iommu;
config = &vdpasim->config;
config->mtu = 1500;
config->status = VIRTIO_NET_S_LINK_UP;
eth_random_addr(config->mac);
eth_random_addr(vdpasim->config.mac);
vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu);
vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu);
@ -413,26 +451,29 @@ static bool vdpasim_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
return vq->ready;
}
static int vdpasim_set_vq_state(struct vdpa_device *vdpa, u16 idx, u64 state)
static int vdpasim_set_vq_state(struct vdpa_device *vdpa, u16 idx,
const struct vdpa_vq_state *state)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
struct vringh *vrh = &vq->vring;
spin_lock(&vdpasim->lock);
vrh->last_avail_idx = state;
vrh->last_avail_idx = state->avail_index;
spin_unlock(&vdpasim->lock);
return 0;
}
static u64 vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx)
static int vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx,
struct vdpa_vq_state *state)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
struct vringh *vrh = &vq->vring;
return vrh->last_avail_idx;
state->avail_index = vrh->last_avail_idx;
return 0;
}
static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa)
@ -448,13 +489,22 @@ static u64 vdpasim_get_features(struct vdpa_device *vdpa)
static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
struct virtio_net_config *config = &vdpasim->config;
/* DMA mapping must be done by driver */
if (!(features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM)))
return -EINVAL;
vdpasim->features = features & vdpasim_features;
/* We generally only know whether guest is using the legacy interface
* here, so generally that's the earliest we can set config fields.
* Note: We actually require VIRTIO_F_ACCESS_PLATFORM above which
* implies VIRTIO_F_VERSION_1, but let's not try to be clever here.
*/
config->mtu = cpu_to_vdpasim16(vdpasim, 1500);
config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP);
return 0;
}
@ -508,7 +558,7 @@ static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset,
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
if (offset + len < sizeof(struct virtio_net_config))
memcpy(buf, &vdpasim->config + offset, len);
memcpy(buf, (u8 *)&vdpasim->config + offset, len);
}
static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset,
@ -532,6 +582,7 @@ static int vdpasim_set_map(struct vdpa_device *vdpa,
u64 start = 0ULL, last = 0ULL - 1;
int ret;
spin_lock(&vdpasim->iommu_lock);
vhost_iotlb_reset(vdpasim->iommu);
for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
@ -541,10 +592,12 @@ static int vdpasim_set_map(struct vdpa_device *vdpa,
if (ret)
goto err;
}
spin_unlock(&vdpasim->iommu_lock);
return 0;
err:
vhost_iotlb_reset(vdpasim->iommu);
spin_unlock(&vdpasim->iommu_lock);
return ret;
}
@ -552,16 +605,23 @@ static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size,
u64 pa, u32 perm)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
int ret;
return vhost_iotlb_add_range(vdpasim->iommu, iova,
iova + size - 1, pa, perm);
spin_lock(&vdpasim->iommu_lock);
ret = vhost_iotlb_add_range(vdpasim->iommu, iova, iova + size - 1, pa,
perm);
spin_unlock(&vdpasim->iommu_lock);
return ret;
}
static int vdpasim_dma_unmap(struct vdpa_device *vdpa, u64 iova, u64 size)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
spin_lock(&vdpasim->iommu_lock);
vhost_iotlb_del_range(vdpasim->iommu, iova, iova + size - 1);
spin_unlock(&vdpasim->iommu_lock);
return 0;
}
@ -597,12 +657,36 @@ static const struct vdpa_config_ops vdpasim_net_config_ops = {
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
.get_generation = vdpasim_get_generation,
.set_map = vdpasim_set_map,
.dma_map = vdpasim_dma_map,
.dma_unmap = vdpasim_dma_unmap,
.free = vdpasim_free,
};
static const struct vdpa_config_ops vdpasim_net_batch_config_ops = {
.set_vq_address = vdpasim_set_vq_address,
.set_vq_num = vdpasim_set_vq_num,
.kick_vq = vdpasim_kick_vq,
.set_vq_cb = vdpasim_set_vq_cb,
.set_vq_ready = vdpasim_set_vq_ready,
.get_vq_ready = vdpasim_get_vq_ready,
.set_vq_state = vdpasim_set_vq_state,
.get_vq_state = vdpasim_get_vq_state,
.get_vq_align = vdpasim_get_vq_align,
.get_features = vdpasim_get_features,
.set_features = vdpasim_set_features,
.set_config_cb = vdpasim_set_config_cb,
.get_vq_num_max = vdpasim_get_vq_num_max,
.get_device_id = vdpasim_get_device_id,
.get_vendor_id = vdpasim_get_vendor_id,
.get_status = vdpasim_get_status,
.set_status = vdpasim_set_status,
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
.get_generation = vdpasim_get_generation,
.set_map = vdpasim_set_map,
.free = vdpasim_free,
};
static int __init vdpasim_dev_init(void)
{
vdpasim_dev = vdpasim_create();

View file

@ -65,6 +65,7 @@ config VHOST_VDPA
tristate "Vhost driver for vDPA-based backend"
depends on EVENTFD
select VHOST
select IRQ_BYPASS_MANAGER
depends on VDPA
help
This kernel module can be loaded in host kernel to accelerate

View file

@ -73,7 +73,7 @@ enum {
VHOST_NET_FEATURES = VHOST_FEATURES |
(1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
(1ULL << VIRTIO_NET_F_MRG_RXBUF) |
(1ULL << VIRTIO_F_IOMMU_PLATFORM)
(1ULL << VIRTIO_F_ACCESS_PLATFORM)
};
enum {
@ -1615,21 +1615,6 @@ done:
return err;
}
static int vhost_net_set_backend_features(struct vhost_net *n, u64 features)
{
int i;
mutex_lock(&n->dev.mutex);
for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
mutex_lock(&n->vqs[i].vq.mutex);
n->vqs[i].vq.acked_backend_features = features;
mutex_unlock(&n->vqs[i].vq.mutex);
}
mutex_unlock(&n->dev.mutex);
return 0;
}
static int vhost_net_set_features(struct vhost_net *n, u64 features)
{
size_t vhost_hlen, sock_hlen, hdr_len;
@ -1653,7 +1638,7 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
!vhost_log_access_ok(&n->dev))
goto out_unlock;
if ((features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) {
if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) {
if (vhost_init_device_iotlb(&n->dev, true))
goto out_unlock;
}
@ -1730,7 +1715,8 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
return -EFAULT;
if (features & ~VHOST_NET_BACKEND_FEATURES)
return -EOPNOTSUPP;
return vhost_net_set_backend_features(n, features);
vhost_set_backend_features(&n->dev, features);
return 0;
case VHOST_RESET_OWNER:
return vhost_net_reset_owner(n);
case VHOST_SET_OWNER:

View file

@ -27,37 +27,11 @@
#include "vhost.h"
enum {
VHOST_VDPA_FEATURES =
(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) |
(1ULL << VIRTIO_F_ANY_LAYOUT) |
(1ULL << VIRTIO_F_VERSION_1) |
(1ULL << VIRTIO_F_IOMMU_PLATFORM) |
(1ULL << VIRTIO_F_RING_PACKED) |
(1ULL << VIRTIO_F_ORDER_PLATFORM) |
(1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
(1ULL << VIRTIO_RING_F_EVENT_IDX),
VHOST_VDPA_NET_FEATURES = VHOST_VDPA_FEATURES |
(1ULL << VIRTIO_NET_F_CSUM) |
(1ULL << VIRTIO_NET_F_GUEST_CSUM) |
(1ULL << VIRTIO_NET_F_MTU) |
(1ULL << VIRTIO_NET_F_MAC) |
(1ULL << VIRTIO_NET_F_GUEST_TSO4) |
(1ULL << VIRTIO_NET_F_GUEST_TSO6) |
(1ULL << VIRTIO_NET_F_GUEST_ECN) |
(1ULL << VIRTIO_NET_F_GUEST_UFO) |
(1ULL << VIRTIO_NET_F_HOST_TSO4) |
(1ULL << VIRTIO_NET_F_HOST_TSO6) |
(1ULL << VIRTIO_NET_F_HOST_ECN) |
(1ULL << VIRTIO_NET_F_HOST_UFO) |
(1ULL << VIRTIO_NET_F_MRG_RXBUF) |
(1ULL << VIRTIO_NET_F_STATUS) |
(1ULL << VIRTIO_NET_F_SPEED_DUPLEX),
VHOST_VDPA_BACKEND_FEATURES =
(1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
(1ULL << VHOST_BACKEND_F_IOTLB_BATCH),
};
/* Currently, only network backend w/o multiqueue is supported. */
#define VHOST_VDPA_VQ_MAX 2
#define VHOST_VDPA_DEV_MAX (1U << MINORBITS)
struct vhost_vdpa {
@ -73,16 +47,13 @@ struct vhost_vdpa {
int virtio_id;
int minor;
struct eventfd_ctx *config_ctx;
int in_batch;
};
static DEFINE_IDA(vhost_vdpa_ida);
static dev_t vhost_vdpa_major;
static const u64 vhost_vdpa_features[] = {
[VIRTIO_ID_NET] = VHOST_VDPA_NET_FEATURES,
};
static void handle_vq_kick(struct vhost_work *work)
{
struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
@ -96,7 +67,7 @@ static void handle_vq_kick(struct vhost_work *work)
static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
{
struct vhost_virtqueue *vq = private;
struct eventfd_ctx *call_ctx = vq->call_ctx;
struct eventfd_ctx *call_ctx = vq->call_ctx.ctx;
if (call_ctx)
eventfd_signal(call_ctx, 1);
@ -115,12 +86,45 @@ static irqreturn_t vhost_vdpa_config_cb(void *private)
return IRQ_HANDLED;
}
static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
{
struct vhost_virtqueue *vq = &v->vqs[qid];
const struct vdpa_config_ops *ops = v->vdpa->config;
struct vdpa_device *vdpa = v->vdpa;
int ret, irq;
if (!ops->get_vq_irq)
return;
irq = ops->get_vq_irq(vdpa, qid);
spin_lock(&vq->call_ctx.ctx_lock);
irq_bypass_unregister_producer(&vq->call_ctx.producer);
if (!vq->call_ctx.ctx || irq < 0) {
spin_unlock(&vq->call_ctx.ctx_lock);
return;
}
vq->call_ctx.producer.token = vq->call_ctx.ctx;
vq->call_ctx.producer.irq = irq;
ret = irq_bypass_register_producer(&vq->call_ctx.producer);
spin_unlock(&vq->call_ctx.ctx_lock);
}
static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
{
struct vhost_virtqueue *vq = &v->vqs[qid];
spin_lock(&vq->call_ctx.ctx_lock);
irq_bypass_unregister_producer(&vq->call_ctx.producer);
spin_unlock(&vq->call_ctx.ctx_lock);
}
static void vhost_vdpa_reset(struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
ops->set_status(vdpa, 0);
vdpa_reset(vdpa);
v->in_batch = 0;
}
static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
@ -155,11 +159,15 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
u8 status;
u8 status, status_old;
int nvqs = v->nvqs;
u16 i;
if (copy_from_user(&status, statusp, sizeof(status)))
return -EFAULT;
status_old = ops->get_status(vdpa);
/*
* Userspace shouldn't remove status bits unless reset the
* status to 0.
@ -169,6 +177,14 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
ops->set_status(vdpa, status);
if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
for (i = 0; i < nvqs; i++)
vhost_vdpa_setup_vq_irq(v, i);
if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK))
for (i = 0; i < nvqs; i++)
vhost_vdpa_unsetup_vq_irq(v, i);
return 0;
}
@ -196,7 +212,6 @@ static long vhost_vdpa_get_config(struct vhost_vdpa *v,
struct vhost_vdpa_config __user *c)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
struct vhost_vdpa_config config;
unsigned long size = offsetof(struct vhost_vdpa_config, buf);
u8 *buf;
@ -209,7 +224,7 @@ static long vhost_vdpa_get_config(struct vhost_vdpa *v,
if (!buf)
return -ENOMEM;
ops->get_config(vdpa, config.off, buf, config.len);
vdpa_get_config(vdpa, config.off, buf, config.len);
if (copy_to_user(c->buf, buf, config.len)) {
kvfree(buf);
@ -255,7 +270,6 @@ static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
u64 features;
features = ops->get_features(vdpa);
features &= vhost_vdpa_features[v->virtio_id];
if (copy_to_user(featurep, &features, sizeof(features)))
return -EFAULT;
@ -279,10 +293,7 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
if (copy_from_user(&features, featurep, sizeof(features)))
return -EFAULT;
if (features & ~vhost_vdpa_features[v->virtio_id])
return -EINVAL;
if (ops->set_features(vdpa, features))
if (vdpa_set_features(vdpa, features))
return -EINVAL;
return 0;
@ -332,14 +343,18 @@ static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
return 0;
}
static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
void __user *argp)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
struct vdpa_vq_state vq_state;
struct vdpa_callback cb;
struct vhost_virtqueue *vq;
struct vhost_vring_state s;
u64 __user *featurep = argp;
u64 features;
u32 idx;
long r;
@ -353,15 +368,32 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
idx = array_index_nospec(idx, v->nvqs);
vq = &v->vqs[idx];
if (cmd == VHOST_VDPA_SET_VRING_ENABLE) {
switch (cmd) {
case VHOST_VDPA_SET_VRING_ENABLE:
if (copy_from_user(&s, argp, sizeof(s)))
return -EFAULT;
ops->set_vq_ready(vdpa, idx, s.num);
return 0;
}
case VHOST_GET_VRING_BASE:
r = ops->get_vq_state(v->vdpa, idx, &vq_state);
if (r)
return r;
if (cmd == VHOST_GET_VRING_BASE)
vq->last_avail_idx = ops->get_vq_state(v->vdpa, idx);
vq->last_avail_idx = vq_state.avail_index;
break;
case VHOST_GET_BACKEND_FEATURES:
features = VHOST_VDPA_BACKEND_FEATURES;
if (copy_to_user(featurep, &features, sizeof(features)))
return -EFAULT;
return 0;
case VHOST_SET_BACKEND_FEATURES:
if (copy_from_user(&features, featurep, sizeof(features)))
return -EFAULT;
if (features & ~VHOST_VDPA_BACKEND_FEATURES)
return -EOPNOTSUPP;
vhost_set_backend_features(&v->vdev, features);
return 0;
}
r = vhost_vring_ioctl(&v->vdev, cmd, argp);
if (r)
@ -377,12 +409,13 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
break;
case VHOST_SET_VRING_BASE:
if (ops->set_vq_state(vdpa, idx, vq->last_avail_idx))
vq_state.avail_index = vq->last_avail_idx;
if (ops->set_vq_state(vdpa, idx, &vq_state))
r = -EINVAL;
break;
case VHOST_SET_VRING_CALL:
if (vq->call_ctx) {
if (vq->call_ctx.ctx) {
cb.callback = vhost_vdpa_virtqueue_cb;
cb.private = vq;
} else {
@ -390,6 +423,7 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
cb.private = NULL;
}
ops->set_vq_cb(vdpa, idx, &cb);
vhost_vdpa_setup_vq_irq(v, idx);
break;
case VHOST_SET_VRING_NUM:
@ -519,13 +553,15 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
if (r)
return r;
if (ops->dma_map)
if (ops->dma_map) {
r = ops->dma_map(vdpa, iova, size, pa, perm);
else if (ops->set_map)
r = ops->set_map(vdpa, dev->iotlb);
else
} else if (ops->set_map) {
if (!v->in_batch)
r = ops->set_map(vdpa, dev->iotlb);
} else {
r = iommu_map(v->domain, iova, pa, size,
perm_to_iommu_flags(perm));
}
return r;
}
@ -538,12 +574,14 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
vhost_vdpa_iotlb_unmap(v, iova, iova + size - 1);
if (ops->dma_map)
if (ops->dma_map) {
ops->dma_unmap(vdpa, iova, size);
else if (ops->set_map)
ops->set_map(vdpa, dev->iotlb);
else
} else if (ops->set_map) {
if (!v->in_batch)
ops->set_map(vdpa, dev->iotlb);
} else {
iommu_unmap(v->domain, iova, size);
}
}
static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
@ -636,6 +674,8 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev,
struct vhost_iotlb_msg *msg)
{
struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
int r = 0;
r = vhost_dev_check_owner(dev);
@ -649,6 +689,14 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev,
case VHOST_IOTLB_INVALIDATE:
vhost_vdpa_unmap(v, msg->iova, msg->size);
break;
case VHOST_IOTLB_BATCH_BEGIN:
v->in_batch = true;
break;
case VHOST_IOTLB_BATCH_END:
if (v->in_batch && ops->set_map)
ops->set_map(vdpa, dev->iotlb);
v->in_batch = false;
break;
default:
r = -EINVAL;
break;
@ -765,6 +813,18 @@ err:
return r;
}
static void vhost_vdpa_clean_irq(struct vhost_vdpa *v)
{
struct vhost_virtqueue *vq;
int i;
for (i = 0; i < v->nvqs; i++) {
vq = &v->vqs[i];
if (vq->call_ctx.producer.irq)
irq_bypass_unregister_producer(&vq->call_ctx.producer);
}
}
static int vhost_vdpa_release(struct inode *inode, struct file *filep)
{
struct vhost_vdpa *v = filep->private_data;
@ -777,6 +837,7 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep)
vhost_vdpa_iotlb_free(v);
vhost_vdpa_free_domain(v);
vhost_vdpa_config_put(v);
vhost_vdpa_clean_irq(v);
vhost_dev_cleanup(&v->vdev);
kfree(v->vdev.vqs);
mutex_unlock(&d->mutex);
@ -872,7 +933,7 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
{
const struct vdpa_config_ops *ops = vdpa->config;
struct vhost_vdpa *v;
int minor, nvqs = VHOST_VDPA_VQ_MAX;
int minor;
int r;
/* Currently, we only accept the network devices. */
@ -893,14 +954,14 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
atomic_set(&v->opened, 0);
v->minor = minor;
v->vdpa = vdpa;
v->nvqs = nvqs;
v->nvqs = vdpa->nvqs;
v->virtio_id = ops->get_device_id(vdpa);
device_initialize(&v->dev);
v->dev.release = vhost_vdpa_release_dev;
v->dev.parent = &vdpa->dev;
v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor);
v->vqs = kmalloc_array(nvqs, sizeof(struct vhost_virtqueue),
v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue),
GFP_KERNEL);
if (!v->vqs) {
r = -ENOMEM;

View file

@ -298,6 +298,13 @@ static void vhost_vq_meta_reset(struct vhost_dev *d)
__vhost_vq_meta_reset(d->vqs[i]);
}
static void vhost_vring_call_reset(struct vhost_vring_call *call_ctx)
{
call_ctx->ctx = NULL;
memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer));
spin_lock_init(&call_ctx->ctx_lock);
}
static void vhost_vq_reset(struct vhost_dev *dev,
struct vhost_virtqueue *vq)
{
@ -319,13 +326,13 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->log_base = NULL;
vq->error_ctx = NULL;
vq->kick = NULL;
vq->call_ctx = NULL;
vq->log_ctx = NULL;
vhost_reset_is_le(vq);
vhost_disable_cross_endian(vq);
vq->busyloop_timeout = 0;
vq->umem = NULL;
vq->iotlb = NULL;
vhost_vring_call_reset(&vq->call_ctx);
__vhost_vq_meta_reset(vq);
}
@ -685,8 +692,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
eventfd_ctx_put(dev->vqs[i]->error_ctx);
if (dev->vqs[i]->kick)
fput(dev->vqs[i]->kick);
if (dev->vqs[i]->call_ctx)
eventfd_ctx_put(dev->vqs[i]->call_ctx);
if (dev->vqs[i]->call_ctx.ctx)
eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx);
vhost_vq_reset(dev, dev->vqs[i]);
}
vhost_dev_free_iovecs(dev);
@ -1405,7 +1412,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
memcpy(newmem, &mem, size);
if (copy_from_user(newmem->regions, m->regions,
mem.nregions * sizeof *m->regions)) {
flex_array_size(newmem, regions, mem.nregions))) {
kvfree(newmem);
return -EFAULT;
}
@ -1629,7 +1636,10 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
r = PTR_ERR(ctx);
break;
}
swap(ctx, vq->call_ctx);
spin_lock(&vq->call_ctx.ctx_lock);
swap(ctx, vq->call_ctx.ctx);
spin_unlock(&vq->call_ctx.ctx_lock);
break;
case VHOST_SET_VRING_ERR:
if (copy_from_user(&f, argp, sizeof f)) {
@ -2435,8 +2445,8 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
{
/* Signal the Guest tell them we used something up. */
if (vq->call_ctx && vhost_notify(dev, vq))
eventfd_signal(vq->call_ctx, 1);
if (vq->call_ctx.ctx && vhost_notify(dev, vq))
eventfd_signal(vq->call_ctx.ctx, 1);
}
EXPORT_SYMBOL_GPL(vhost_signal);
@ -2576,6 +2586,21 @@ struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev,
}
EXPORT_SYMBOL_GPL(vhost_dequeue_msg);
void vhost_set_backend_features(struct vhost_dev *dev, u64 features)
{
struct vhost_virtqueue *vq;
int i;
mutex_lock(&dev->mutex);
for (i = 0; i < dev->nvqs; ++i) {
vq = dev->vqs[i];
mutex_lock(&vq->mutex);
vq->acked_backend_features = features;
mutex_unlock(&vq->mutex);
}
mutex_unlock(&dev->mutex);
}
EXPORT_SYMBOL_GPL(vhost_set_backend_features);
static int __init vhost_init(void)
{

View file

@ -13,6 +13,7 @@
#include <linux/virtio_ring.h>
#include <linux/atomic.h>
#include <linux/vhost_iotlb.h>
#include <linux/irqbypass.h>
struct vhost_work;
typedef void (*vhost_work_fn_t)(struct vhost_work *work);
@ -60,6 +61,12 @@ enum vhost_uaddr_type {
VHOST_NUM_ADDRS = 3,
};
struct vhost_vring_call {
struct eventfd_ctx *ctx;
struct irq_bypass_producer producer;
spinlock_t ctx_lock;
};
/* The virtqueue structure describes a queue attached to a device. */
struct vhost_virtqueue {
struct vhost_dev *dev;
@ -72,7 +79,7 @@ struct vhost_virtqueue {
vring_used_t __user *used;
const struct vhost_iotlb_map *meta_iotlb[VHOST_NUM_ADDRS];
struct file *kick;
struct eventfd_ctx *call_ctx;
struct vhost_vring_call call_ctx;
struct eventfd_ctx *error_ctx;
struct eventfd_ctx *log_ctx;
@ -207,6 +214,8 @@ void vhost_enqueue_msg(struct vhost_dev *dev,
struct vhost_msg_node *node);
struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev,
struct list_head *head);
void vhost_set_backend_features(struct vhost_dev *dev, u64 features);
__poll_t vhost_chr_poll(struct file *file, struct vhost_dev *dev,
poll_table *wait);
ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to,

View file

@ -398,12 +398,9 @@ static inline s64 towards_target(struct virtio_balloon *vb)
s64 target;
u32 num_pages;
virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages,
&num_pages);
/* Legacy balloon config space is LE, unlike all other devices. */
if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1))
num_pages = le32_to_cpu((__force __le32)num_pages);
virtio_cread_le(vb->vdev, struct virtio_balloon_config, num_pages,
&num_pages);
target = num_pages;
return target - vb->num_pages;
@ -462,11 +459,8 @@ static void update_balloon_size(struct virtio_balloon *vb)
u32 actual = vb->num_pages;
/* Legacy balloon config space is LE, unlike all other devices. */
if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1))
actual = (__force u32)cpu_to_le32(actual);
virtio_cwrite(vb->vdev, struct virtio_balloon_config, actual,
&actual);
virtio_cwrite_le(vb->vdev, struct virtio_balloon_config, actual,
&actual);
}
static void update_balloon_stats_func(struct work_struct *work)
@ -579,12 +573,10 @@ static u32 virtio_balloon_cmd_id_received(struct virtio_balloon *vb)
{
if (test_and_clear_bit(VIRTIO_BALLOON_CONFIG_READ_CMD_ID,
&vb->config_read_bitmap)) {
virtio_cread(vb->vdev, struct virtio_balloon_config,
free_page_hint_cmd_id,
&vb->cmd_id_received_cache);
/* Legacy balloon config space is LE, unlike all other devices. */
if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1))
vb->cmd_id_received_cache = le32_to_cpu((__force __le32)vb->cmd_id_received_cache);
virtio_cread_le(vb->vdev, struct virtio_balloon_config,
free_page_hint_cmd_id,
&vb->cmd_id_received_cache);
}
return vb->cmd_id_received_cache;
@ -600,7 +592,7 @@ static int send_cmd_id_start(struct virtio_balloon *vb)
while (virtqueue_get_buf(vq, &unused))
;
vb->cmd_id_active = virtio32_to_cpu(vb->vdev,
vb->cmd_id_active = cpu_to_virtio32(vb->vdev,
virtio_balloon_cmd_id_received(vb));
sg_init_one(&sg, &vb->cmd_id_active, sizeof(vb->cmd_id_active));
err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_active, GFP_KERNEL);
@ -987,8 +979,8 @@ static int virtballoon_probe(struct virtio_device *vdev)
if (!want_init_on_free())
memset(&poison_val, PAGE_POISON, sizeof(poison_val));
virtio_cwrite(vb->vdev, struct virtio_balloon_config,
poison_val, &poison_val);
virtio_cwrite_le(vb->vdev, struct virtio_balloon_config,
poison_val, &poison_val);
}
vb->pr_dev_info.report = virtballoon_free_page_report;
@ -1129,7 +1121,7 @@ static int virtballoon_validate(struct virtio_device *vdev)
else if (!virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON))
__virtio_clear_bit(vdev, VIRTIO_BALLOON_F_REPORTING);
__virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM);
__virtio_clear_bit(vdev, VIRTIO_F_ACCESS_PLATFORM);
return 0;
}

View file

@ -113,9 +113,9 @@ static u8 virtinput_cfg_select(struct virtio_input *vi,
{
u8 size;
virtio_cwrite(vi->vdev, struct virtio_input_config, select, &select);
virtio_cwrite(vi->vdev, struct virtio_input_config, subsel, &subsel);
virtio_cread(vi->vdev, struct virtio_input_config, size, &size);
virtio_cwrite_le(vi->vdev, struct virtio_input_config, select, &select);
virtio_cwrite_le(vi->vdev, struct virtio_input_config, subsel, &subsel);
virtio_cread_le(vi->vdev, struct virtio_input_config, size, &size);
return size;
}
@ -158,11 +158,11 @@ static void virtinput_cfg_abs(struct virtio_input *vi, int abs)
u32 mi, ma, re, fu, fl;
virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ABS_INFO, abs);
virtio_cread(vi->vdev, struct virtio_input_config, u.abs.min, &mi);
virtio_cread(vi->vdev, struct virtio_input_config, u.abs.max, &ma);
virtio_cread(vi->vdev, struct virtio_input_config, u.abs.res, &re);
virtio_cread(vi->vdev, struct virtio_input_config, u.abs.fuzz, &fu);
virtio_cread(vi->vdev, struct virtio_input_config, u.abs.flat, &fl);
virtio_cread_le(vi->vdev, struct virtio_input_config, u.abs.min, &mi);
virtio_cread_le(vi->vdev, struct virtio_input_config, u.abs.max, &ma);
virtio_cread_le(vi->vdev, struct virtio_input_config, u.abs.res, &re);
virtio_cread_le(vi->vdev, struct virtio_input_config, u.abs.fuzz, &fu);
virtio_cread_le(vi->vdev, struct virtio_input_config, u.abs.flat, &fl);
input_set_abs_params(vi->idev, abs, mi, ma, fu, fl);
input_abs_set_res(vi->idev, abs, re);
}
@ -244,14 +244,14 @@ static int virtinput_probe(struct virtio_device *vdev)
size = virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ID_DEVIDS, 0);
if (size >= sizeof(struct virtio_input_devids)) {
virtio_cread(vi->vdev, struct virtio_input_config,
u.ids.bustype, &vi->idev->id.bustype);
virtio_cread(vi->vdev, struct virtio_input_config,
u.ids.vendor, &vi->idev->id.vendor);
virtio_cread(vi->vdev, struct virtio_input_config,
u.ids.product, &vi->idev->id.product);
virtio_cread(vi->vdev, struct virtio_input_config,
u.ids.version, &vi->idev->id.version);
virtio_cread_le(vi->vdev, struct virtio_input_config,
u.ids.bustype, &vi->idev->id.bustype);
virtio_cread_le(vi->vdev, struct virtio_input_config,
u.ids.vendor, &vi->idev->id.vendor);
virtio_cread_le(vi->vdev, struct virtio_input_config,
u.ids.product, &vi->idev->id.product);
virtio_cread_le(vi->vdev, struct virtio_input_config,
u.ids.version, &vi->idev->id.version);
} else {
vi->idev->id.bustype = BUS_VIRTUAL;
}

View file

@ -1530,21 +1530,21 @@ static void virtio_mem_refresh_config(struct virtio_mem *vm)
uint64_t new_plugged_size, usable_region_size, end_addr;
/* the plugged_size is just a reflection of what _we_ did previously */
virtio_cread(vm->vdev, struct virtio_mem_config, plugged_size,
&new_plugged_size);
virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size,
&new_plugged_size);
if (WARN_ON_ONCE(new_plugged_size != vm->plugged_size))
vm->plugged_size = new_plugged_size;
/* calculate the last usable memory block id */
virtio_cread(vm->vdev, struct virtio_mem_config,
usable_region_size, &usable_region_size);
virtio_cread_le(vm->vdev, struct virtio_mem_config,
usable_region_size, &usable_region_size);
end_addr = vm->addr + usable_region_size;
end_addr = min(end_addr, phys_limit);
vm->last_usable_mb_id = virtio_mem_phys_to_mb_id(end_addr) - 1;
/* see if there is a request to change the size */
virtio_cread(vm->vdev, struct virtio_mem_config, requested_size,
&vm->requested_size);
virtio_cread_le(vm->vdev, struct virtio_mem_config, requested_size,
&vm->requested_size);
dev_info(&vm->vdev->dev, "plugged size: 0x%llx", vm->plugged_size);
dev_info(&vm->vdev->dev, "requested size: 0x%llx", vm->requested_size);
@ -1677,16 +1677,16 @@ static int virtio_mem_init(struct virtio_mem *vm)
}
/* Fetch all properties that can't change. */
virtio_cread(vm->vdev, struct virtio_mem_config, plugged_size,
&vm->plugged_size);
virtio_cread(vm->vdev, struct virtio_mem_config, block_size,
&vm->device_block_size);
virtio_cread(vm->vdev, struct virtio_mem_config, node_id,
&node_id);
virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size,
&vm->plugged_size);
virtio_cread_le(vm->vdev, struct virtio_mem_config, block_size,
&vm->device_block_size);
virtio_cread_le(vm->vdev, struct virtio_mem_config, node_id,
&node_id);
vm->nid = virtio_mem_translate_node_id(vm, node_id);
virtio_cread(vm->vdev, struct virtio_mem_config, addr, &vm->addr);
virtio_cread(vm->vdev, struct virtio_mem_config, region_size,
&vm->region_size);
virtio_cread_le(vm->vdev, struct virtio_mem_config, addr, &vm->addr);
virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size,
&vm->region_size);
/*
* We always hotplug memory in memory block granularity. This way,

View file

@ -481,6 +481,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
* @dev: the pci device
* @cfg_type: the VIRTIO_PCI_CAP_* value we seek
* @ioresource_types: IORESOURCE_MEM and/or IORESOURCE_IO.
* @bars: the bitmask of BARs
*
* Returns offset of the capability, or 0.
*/

View file

@ -240,7 +240,7 @@ static inline bool virtqueue_use_indirect(struct virtqueue *_vq,
static bool vring_use_dma_api(struct virtio_device *vdev)
{
if (!virtio_has_iommu_quirk(vdev))
if (!virtio_has_dma_quirk(vdev))
return true;
/* Otherwise, we are left to guess. */
@ -1960,6 +1960,9 @@ bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx)
{
struct vring_virtqueue *vq = to_vvq(_vq);
if (unlikely(vq->broken))
return false;
virtio_mb(vq->weak_barriers);
return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
virtqueue_poll_split(_vq, last_used_idx);
@ -2225,7 +2228,7 @@ void vring_transport_features(struct virtio_device *vdev)
break;
case VIRTIO_F_VERSION_1:
break;
case VIRTIO_F_IOMMU_PLATFORM:
case VIRTIO_F_ACCESS_PLATFORM:
break;
case VIRTIO_F_RING_PACKED:
break;

View file

@ -57,9 +57,8 @@ static void virtio_vdpa_get(struct virtio_device *vdev, unsigned offset,
void *buf, unsigned len)
{
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
const struct vdpa_config_ops *ops = vdpa->config;
ops->get_config(vdpa, offset, buf, len);
vdpa_get_config(vdpa, offset, buf, len);
}
static void virtio_vdpa_set(struct virtio_device *vdev, unsigned offset,
@ -101,9 +100,8 @@ static void virtio_vdpa_set_status(struct virtio_device *vdev, u8 status)
static void virtio_vdpa_reset(struct virtio_device *vdev)
{
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
const struct vdpa_config_ops *ops = vdpa->config;
return ops->set_status(vdpa, 0);
vdpa_reset(vdpa);
}
static bool virtio_vdpa_notify(struct virtqueue *vq)
@ -294,12 +292,11 @@ static u64 virtio_vdpa_get_features(struct virtio_device *vdev)
static int virtio_vdpa_finalize_features(struct virtio_device *vdev)
{
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
const struct vdpa_config_ops *ops = vdpa->config;
/* Give virtio_ring a chance to accept features. */
vring_transport_features(vdev);
return ops->set_features(vdpa, vdev->features);
return vdpa_set_features(vdpa, vdev->features);
}
static const char *virtio_vdpa_bus_name(struct virtio_device *vdev)

View file

@ -606,8 +606,8 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
unsigned int i;
int ret = 0;
virtio_cread(vdev, struct virtio_fs_config, num_request_queues,
&fs->num_request_queues);
virtio_cread_le(vdev, struct virtio_fs_config, num_request_queues,
&fs->num_request_queues);
if (fs->num_request_queues == 0)
return -EINVAL;

View file

@ -27,18 +27,29 @@ struct vdpa_notification_area {
resource_size_t size;
};
/**
* vDPA vq_state definition
* @avail_index: available index
*/
struct vdpa_vq_state {
u16 avail_index;
};
/**
* vDPA device - representation of a vDPA device
* @dev: underlying device
* @dma_dev: the actual device that is performing DMA
* @config: the configuration ops for this device.
* @index: device index
* @features_valid: were features initialized? for legacy guests
*/
struct vdpa_device {
struct device dev;
struct device *dma_dev;
const struct vdpa_config_ops *config;
unsigned int index;
bool features_valid;
int nvqs;
};
/**
@ -77,16 +88,22 @@ struct vdpa_device {
* @set_vq_state: Set the state for a virtqueue
* @vdev: vdpa device
* @idx: virtqueue index
* @state: virtqueue state (last_avail_idx)
* @state: pointer to set virtqueue state (last_avail_idx)
* Returns integer: success (0) or error (< 0)
* @get_vq_state: Get the state for a virtqueue
* @vdev: vdpa device
* @idx: virtqueue index
* Returns virtqueue state (last_avail_idx)
* @state: pointer to returned state (last_avail_idx)
* @get_vq_notification: Get the notification area for a virtqueue
* @vdev: vdpa device
* @idx: virtqueue index
* Returns the notifcation area
* @get_vq_irq: Get the irq number of a virtqueue (optional,
* but must implemented if require vq irq offloading)
* @vdev: vdpa device
* @idx: virtqueue index
* Returns int: irq number of a virtqueue,
* negative number if no irq assigned.
* @get_vq_align: Get the virtqueue align requirement
* for the device
* @vdev: vdpa device
@ -174,10 +191,14 @@ struct vdpa_config_ops {
struct vdpa_callback *cb);
void (*set_vq_ready)(struct vdpa_device *vdev, u16 idx, bool ready);
bool (*get_vq_ready)(struct vdpa_device *vdev, u16 idx);
int (*set_vq_state)(struct vdpa_device *vdev, u16 idx, u64 state);
u64 (*get_vq_state)(struct vdpa_device *vdev, u16 idx);
int (*set_vq_state)(struct vdpa_device *vdev, u16 idx,
const struct vdpa_vq_state *state);
int (*get_vq_state)(struct vdpa_device *vdev, u16 idx,
struct vdpa_vq_state *state);
struct vdpa_notification_area
(*get_vq_notification)(struct vdpa_device *vdev, u16 idx);
/* vq irq is not expected to be changed once DRIVER_OK is set */
int (*get_vq_irq)(struct vdpa_device *vdv, u16 idx);
/* Device ops */
u32 (*get_vq_align)(struct vdpa_device *vdev);
@ -208,11 +229,12 @@ struct vdpa_config_ops {
struct vdpa_device *__vdpa_alloc_device(struct device *parent,
const struct vdpa_config_ops *config,
int nvqs,
size_t size);
#define vdpa_alloc_device(dev_struct, member, parent, config) \
#define vdpa_alloc_device(dev_struct, member, parent, config, nvqs) \
container_of(__vdpa_alloc_device( \
parent, config, \
parent, config, nvqs, \
sizeof(dev_struct) + \
BUILD_BUG_ON_ZERO(offsetof( \
dev_struct, member))), \
@ -266,4 +288,36 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)
{
return vdev->dma_dev;
}
static inline void vdpa_reset(struct vdpa_device *vdev)
{
const struct vdpa_config_ops *ops = vdev->config;
vdev->features_valid = false;
ops->set_status(vdev, 0);
}
static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)
{
const struct vdpa_config_ops *ops = vdev->config;
vdev->features_valid = true;
return ops->set_features(vdev, features);
}
static inline void vdpa_get_config(struct vdpa_device *vdev, unsigned offset,
void *buf, unsigned int len)
{
const struct vdpa_config_ops *ops = vdev->config;
/*
* Config accesses aren't supposed to trigger before features are set.
* If it does happen we assume a legacy guest.
*/
if (!vdev->features_valid)
vdpa_set_features(vdev, 0);
ops->get_config(vdev, offset, buf, len);
}
#endif /* _LINUX_VDPA_H */

View file

@ -11,9 +11,9 @@
#include <linux/types.h>
struct virtio_caif_transf_config {
u16 headroom;
u16 tailroom;
u32 mtu;
__virtio16 headroom;
__virtio16 tailroom;
__virtio32 mtu;
u8 reserved[4];
};

View file

@ -6,6 +6,7 @@
#include <linux/bug.h>
#include <linux/virtio.h>
#include <linux/virtio_byteorder.h>
#include <linux/compiler_types.h>
#include <uapi/linux/virtio_config.h>
struct irq_affinity;
@ -162,16 +163,16 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
}
/**
* virtio_has_iommu_quirk - determine whether this device has the iommu quirk
* virtio_has_dma_quirk - determine whether this device has the DMA quirk
* @vdev: the device
*/
static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev)
static inline bool virtio_has_dma_quirk(const struct virtio_device *vdev)
{
/*
* Note the reverse polarity of the quirk feature (compared to most
* other features), this is for compatibility with legacy systems.
*/
return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
return !virtio_has_feature(vdev, VIRTIO_F_ACCESS_PLATFORM);
}
static inline
@ -287,70 +288,133 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val)
return __cpu_to_virtio64(virtio_is_little_endian(vdev), val);
}
#define virtio_to_cpu(vdev, x) \
_Generic((x), \
__u8: (x), \
__virtio16: virtio16_to_cpu((vdev), (x)), \
__virtio32: virtio32_to_cpu((vdev), (x)), \
__virtio64: virtio64_to_cpu((vdev), (x)) \
)
#define cpu_to_virtio(vdev, x, m) \
_Generic((m), \
__u8: (x), \
__virtio16: cpu_to_virtio16((vdev), (x)), \
__virtio32: cpu_to_virtio32((vdev), (x)), \
__virtio64: cpu_to_virtio64((vdev), (x)) \
)
#define __virtio_native_type(structname, member) \
typeof(virtio_to_cpu(NULL, ((structname*)0)->member))
/* Config space accessors. */
#define virtio_cread(vdev, structname, member, ptr) \
do { \
might_sleep(); \
/* Must match the member's type, and be integer */ \
if (!typecheck(typeof((((structname*)0)->member)), *(ptr))) \
(*ptr) = 1; \
typeof(((structname*)0)->member) virtio_cread_v; \
\
switch (sizeof(*ptr)) { \
might_sleep(); \
/* Sanity check: must match the member's type */ \
typecheck(typeof(virtio_to_cpu((vdev), virtio_cread_v)), *(ptr)); \
\
switch (sizeof(virtio_cread_v)) { \
case 1: \
*(ptr) = virtio_cread8(vdev, \
offsetof(structname, member)); \
break; \
case 2: \
*(ptr) = virtio_cread16(vdev, \
offsetof(structname, member)); \
break; \
case 4: \
*(ptr) = virtio_cread32(vdev, \
offsetof(structname, member)); \
break; \
case 8: \
*(ptr) = virtio_cread64(vdev, \
offsetof(structname, member)); \
vdev->config->get((vdev), \
offsetof(structname, member), \
&virtio_cread_v, \
sizeof(virtio_cread_v)); \
break; \
default: \
BUG(); \
__virtio_cread_many((vdev), \
offsetof(structname, member), \
&virtio_cread_v, \
1, \
sizeof(virtio_cread_v)); \
break; \
} \
*(ptr) = virtio_to_cpu(vdev, virtio_cread_v); \
} while(0)
/* Config space accessors. */
#define virtio_cwrite(vdev, structname, member, ptr) \
do { \
might_sleep(); \
/* Must match the member's type, and be integer */ \
if (!typecheck(typeof((((structname*)0)->member)), *(ptr))) \
BUG_ON((*ptr) == 1); \
typeof(((structname*)0)->member) virtio_cwrite_v = \
cpu_to_virtio(vdev, *(ptr), ((structname*)0)->member); \
\
switch (sizeof(*ptr)) { \
might_sleep(); \
/* Sanity check: must match the member's type */ \
typecheck(typeof(virtio_to_cpu((vdev), virtio_cwrite_v)), *(ptr)); \
\
vdev->config->set((vdev), offsetof(structname, member), \
&virtio_cwrite_v, \
sizeof(virtio_cwrite_v)); \
} while(0)
/*
* Nothing virtio-specific about these, but let's worry about generalizing
* these later.
*/
#define virtio_le_to_cpu(x) \
_Generic((x), \
__u8: (u8)(x), \
__le16: (u16)le16_to_cpu(x), \
__le32: (u32)le32_to_cpu(x), \
__le64: (u64)le64_to_cpu(x) \
)
#define virtio_cpu_to_le(x, m) \
_Generic((m), \
__u8: (x), \
__le16: cpu_to_le16(x), \
__le32: cpu_to_le32(x), \
__le64: cpu_to_le64(x) \
)
/* LE (e.g. modern) Config space accessors. */
#define virtio_cread_le(vdev, structname, member, ptr) \
do { \
typeof(((structname*)0)->member) virtio_cread_v; \
\
might_sleep(); \
/* Sanity check: must match the member's type */ \
typecheck(typeof(virtio_le_to_cpu(virtio_cread_v)), *(ptr)); \
\
switch (sizeof(virtio_cread_v)) { \
case 1: \
virtio_cwrite8(vdev, \
offsetof(structname, member), \
*(ptr)); \
break; \
case 2: \
virtio_cwrite16(vdev, \
offsetof(structname, member), \
*(ptr)); \
break; \
case 4: \
virtio_cwrite32(vdev, \
offsetof(structname, member), \
*(ptr)); \
break; \
case 8: \
virtio_cwrite64(vdev, \
offsetof(structname, member), \
*(ptr)); \
vdev->config->get((vdev), \
offsetof(structname, member), \
&virtio_cread_v, \
sizeof(virtio_cread_v)); \
break; \
default: \
BUG(); \
__virtio_cread_many((vdev), \
offsetof(structname, member), \
&virtio_cread_v, \
1, \
sizeof(virtio_cread_v)); \
break; \
} \
*(ptr) = virtio_le_to_cpu(virtio_cread_v); \
} while(0)
#define virtio_cwrite_le(vdev, structname, member, ptr) \
do { \
typeof(((structname*)0)->member) virtio_cwrite_v = \
virtio_cpu_to_le(*(ptr), ((structname*)0)->member); \
\
might_sleep(); \
/* Sanity check: must match the member's type */ \
typecheck(typeof(virtio_le_to_cpu(virtio_cwrite_v)), *(ptr)); \
\
vdev->config->set((vdev), offsetof(structname, member), \
&virtio_cwrite_v, \
sizeof(virtio_cwrite_v)); \
} while(0)
/* Read @count fields, @bytes each. */
static inline void __virtio_cread_many(struct virtio_device *vdev,
unsigned int offset,
@ -399,53 +463,60 @@ static inline void virtio_cwrite8(struct virtio_device *vdev,
static inline u16 virtio_cread16(struct virtio_device *vdev,
unsigned int offset)
{
u16 ret;
__virtio16 ret;
might_sleep();
vdev->config->get(vdev, offset, &ret, sizeof(ret));
return virtio16_to_cpu(vdev, (__force __virtio16)ret);
return virtio16_to_cpu(vdev, ret);
}
static inline void virtio_cwrite16(struct virtio_device *vdev,
unsigned int offset, u16 val)
{
__virtio16 v;
might_sleep();
val = (__force u16)cpu_to_virtio16(vdev, val);
vdev->config->set(vdev, offset, &val, sizeof(val));
v = cpu_to_virtio16(vdev, val);
vdev->config->set(vdev, offset, &v, sizeof(v));
}
static inline u32 virtio_cread32(struct virtio_device *vdev,
unsigned int offset)
{
u32 ret;
__virtio32 ret;
might_sleep();
vdev->config->get(vdev, offset, &ret, sizeof(ret));
return virtio32_to_cpu(vdev, (__force __virtio32)ret);
return virtio32_to_cpu(vdev, ret);
}
static inline void virtio_cwrite32(struct virtio_device *vdev,
unsigned int offset, u32 val)
{
__virtio32 v;
might_sleep();
val = (__force u32)cpu_to_virtio32(vdev, val);
vdev->config->set(vdev, offset, &val, sizeof(val));
v = cpu_to_virtio32(vdev, val);
vdev->config->set(vdev, offset, &v, sizeof(v));
}
static inline u64 virtio_cread64(struct virtio_device *vdev,
unsigned int offset)
{
u64 ret;
__virtio64 ret;
__virtio_cread_many(vdev, offset, &ret, 1, sizeof(ret));
return virtio64_to_cpu(vdev, (__force __virtio64)ret);
return virtio64_to_cpu(vdev, ret);
}
static inline void virtio_cwrite64(struct virtio_device *vdev,
unsigned int offset, u64 val)
{
__virtio64 v;
might_sleep();
val = (__force u64)cpu_to_virtio64(vdev, val);
vdev->config->set(vdev, offset, &val, sizeof(val));
v = cpu_to_virtio64(vdev, val);
vdev->config->set(vdev, offset, &v, sizeof(v));
}
/* Conditional config space accessors. */
@ -459,4 +530,14 @@ static inline void virtio_cwrite64(struct virtio_device *vdev,
_r; \
})
/* Conditional config space accessors. */
#define virtio_cread_le_feature(vdev, fbit, structname, member, ptr) \
({ \
int _r = 0; \
if (!virtio_has_feature(vdev, fbit)) \
_r = -ENOENT; \
else \
virtio_cread_le((vdev), structname, member, ptr); \
_r; \
})
#endif /* _LINUX_VIRTIO_CONFIG_H */

View file

@ -46,16 +46,15 @@ static inline void virtio_wmb(bool weak_barriers)
dma_wmb();
}
static inline void virtio_store_mb(bool weak_barriers,
__virtio16 *p, __virtio16 v)
{
if (weak_barriers) {
virt_store_mb(*p, v);
} else {
WRITE_ONCE(*p, v);
mb();
}
}
#define virtio_store_mb(weak_barriers, p, v) \
do { \
if (weak_barriers) { \
virt_store_mb(*p, v); \
} else { \
WRITE_ONCE(*p, v); \
mb(); \
} \
} while (0) \
struct virtio_device;
struct virtqueue;

View file

@ -91,6 +91,8 @@
/* Use message type V2 */
#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
/* IOTLB can accept batching hints */
#define VHOST_BACKEND_F_IOTLB_BATCH 0x2
#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)

View file

@ -60,6 +60,17 @@ struct vhost_iotlb_msg {
#define VHOST_IOTLB_UPDATE 2
#define VHOST_IOTLB_INVALIDATE 3
#define VHOST_IOTLB_ACCESS_FAIL 4
/*
* VHOST_IOTLB_BATCH_BEGIN and VHOST_IOTLB_BATCH_END allow modifying
* multiple mappings in one go: beginning with
* VHOST_IOTLB_BATCH_BEGIN, followed by any number of
* VHOST_IOTLB_UPDATE messages, and ending with VHOST_IOTLB_BATCH_END.
* When one of these two values is used as the message type, the rest
* of the fields in the message are ignored. There's no guarantee that
* these changes take place automatically in the device.
*/
#define VHOST_IOTLB_BATCH_BEGIN 5
#define VHOST_IOTLB_BATCH_END 6
__u8 type;
};

View file

@ -25,7 +25,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE. */
#include <linux/types.h>
#include <linux/virtio_types.h>
#include <linux/virtio_ids.h>
#include <linux/virtio_config.h>
@ -36,7 +36,7 @@
struct virtio_9p_config {
/* length of the tag name */
__u16 tag_len;
__virtio16 tag_len;
/* non-NULL terminated tag name */
__u8 tag[0];
} __attribute__((packed));

View file

@ -45,20 +45,20 @@
#define VIRTIO_BALLOON_CMD_ID_DONE 1
struct virtio_balloon_config {
/* Number of pages host wants Guest to give up. */
__u32 num_pages;
__le32 num_pages;
/* Number of pages we've actually got in balloon. */
__u32 actual;
__le32 actual;
/*
* Free page hint command id, readonly by guest.
* Was previously named free_page_report_cmd_id so we
* need to carry that name for legacy support.
*/
union {
__u32 free_page_hint_cmd_id;
__u32 free_page_report_cmd_id; /* deprecated */
__le32 free_page_hint_cmd_id;
__le32 free_page_report_cmd_id; /* deprecated */
};
/* Stores PAGE_POISON if page poisoning is in use */
__u32 poison_val;
__le32 poison_val;
};
#define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */

View file

@ -57,20 +57,20 @@
struct virtio_blk_config {
/* The capacity (in 512-byte sectors). */
__u64 capacity;
__virtio64 capacity;
/* The maximum segment size (if VIRTIO_BLK_F_SIZE_MAX) */
__u32 size_max;
__virtio32 size_max;
/* The maximum number of segments (if VIRTIO_BLK_F_SEG_MAX) */
__u32 seg_max;
__virtio32 seg_max;
/* geometry of the device (if VIRTIO_BLK_F_GEOMETRY) */
struct virtio_blk_geometry {
__u16 cylinders;
__virtio16 cylinders;
__u8 heads;
__u8 sectors;
} geometry;
/* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */
__u32 blk_size;
__virtio32 blk_size;
/* the next 4 entries are guarded by VIRTIO_BLK_F_TOPOLOGY */
/* exponent for physical block per logical block. */
@ -78,42 +78,42 @@ struct virtio_blk_config {
/* alignment offset in logical blocks. */
__u8 alignment_offset;
/* minimum I/O size without performance penalty in logical blocks. */
__u16 min_io_size;
__virtio16 min_io_size;
/* optimal sustained I/O size in logical blocks. */
__u32 opt_io_size;
__virtio32 opt_io_size;
/* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */
__u8 wce;
__u8 unused;
/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
__u16 num_queues;
__virtio16 num_queues;
/* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */
/*
* The maximum discard sectors (in 512-byte sectors) for
* one segment.
*/
__u32 max_discard_sectors;
__virtio32 max_discard_sectors;
/*
* The maximum number of discard segments in a
* discard command.
*/
__u32 max_discard_seg;
__virtio32 max_discard_seg;
/* Discard commands must be aligned to this number of sectors. */
__u32 discard_sector_alignment;
__virtio32 discard_sector_alignment;
/* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */
/*
* The maximum number of write zeroes sectors (in 512-byte sectors) in
* one segment.
*/
__u32 max_write_zeroes_sectors;
__virtio32 max_write_zeroes_sectors;
/*
* The maximum number of segments in a write zeroes
* command.
*/
__u32 max_write_zeroes_seg;
__virtio32 max_write_zeroes_seg;
/*
* Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the
* deallocation of one or more of the sectors.

View file

@ -67,13 +67,17 @@
#define VIRTIO_F_VERSION_1 32
/*
* If clear - device has the IOMMU bypass quirk feature.
* If set - use platform tools to detect the IOMMU.
* If clear - device has the platform DMA (e.g. IOMMU) bypass quirk feature.
* If set - use platform DMA tools to access the memory.
*
* Note the reverse polarity (compared to most other features),
* this is for compatibility with legacy systems.
*/
#define VIRTIO_F_IOMMU_PLATFORM 33
#define VIRTIO_F_ACCESS_PLATFORM 33
#ifndef __KERNEL__
/* Legacy name for VIRTIO_F_ACCESS_PLATFORM (for compatibility with old userspace) */
#define VIRTIO_F_IOMMU_PLATFORM VIRTIO_F_ACCESS_PLATFORM
#endif /* __KERNEL__ */
/* This feature indicates support for the packed virtqueue layout. */
#define VIRTIO_F_RING_PACKED 34

View file

@ -45,13 +45,13 @@
struct virtio_console_config {
/* colums of the screens */
__u16 cols;
__virtio16 cols;
/* rows of the screens */
__u16 rows;
__virtio16 rows;
/* max. number of ports this device can hold */
__u32 max_nr_ports;
__virtio32 max_nr_ports;
/* emergency write register */
__u32 emerg_wr;
__virtio32 emerg_wr;
} __attribute__((packed));
/*

View file

@ -414,33 +414,33 @@ struct virtio_crypto_op_data_req {
struct virtio_crypto_config {
/* See VIRTIO_CRYPTO_OP_* above */
__u32 status;
__le32 status;
/*
* Maximum number of data queue
*/
__u32 max_dataqueues;
__le32 max_dataqueues;
/*
* Specifies the services mask which the device support,
* see VIRTIO_CRYPTO_SERVICE_* above
*/
__u32 crypto_services;
__le32 crypto_services;
/* Detailed algorithms mask */
__u32 cipher_algo_l;
__u32 cipher_algo_h;
__u32 hash_algo;
__u32 mac_algo_l;
__u32 mac_algo_h;
__u32 aead_algo;
__le32 cipher_algo_l;
__le32 cipher_algo_h;
__le32 hash_algo;
__le32 mac_algo_l;
__le32 mac_algo_h;
__le32 aead_algo;
/* Maximum length of cipher key */
__u32 max_cipher_key_len;
__le32 max_cipher_key_len;
/* Maximum length of authenticated key */
__u32 max_auth_key_len;
__u32 reserve;
__le32 max_auth_key_len;
__le32 reserve;
/* Maximum size of each crypto request's content */
__u64 max_size;
__le64 max_size;
};
struct virtio_crypto_inhdr {

View file

@ -13,7 +13,7 @@ struct virtio_fs_config {
__u8 tag[36];
/* Number of request queues */
__u32 num_request_queues;
__le32 num_request_queues;
} __attribute__((packed));
#endif /* _UAPI_LINUX_VIRTIO_FS_H */

View file

@ -320,10 +320,10 @@ struct virtio_gpu_resp_edid {
#define VIRTIO_GPU_EVENT_DISPLAY (1 << 0)
struct virtio_gpu_config {
__u32 events_read;
__u32 events_clear;
__u32 num_scanouts;
__u32 num_capsets;
__le32 events_read;
__le32 events_clear;
__le32 num_scanouts;
__le32 num_capsets;
};
/* simple formats for fbcon/X use */

View file

@ -40,18 +40,18 @@ enum virtio_input_config_select {
};
struct virtio_input_absinfo {
__u32 min;
__u32 max;
__u32 fuzz;
__u32 flat;
__u32 res;
__le32 min;
__le32 max;
__le32 fuzz;
__le32 flat;
__le32 res;
};
struct virtio_input_devids {
__u16 bustype;
__u16 vendor;
__u16 product;
__u16 version;
__le16 bustype;
__le16 vendor;
__le16 product;
__le16 version;
};
struct virtio_input_config {

View file

@ -18,24 +18,24 @@
#define VIRTIO_IOMMU_F_MMIO 5
struct virtio_iommu_range_64 {
__u64 start;
__u64 end;
__le64 start;
__le64 end;
};
struct virtio_iommu_range_32 {
__u32 start;
__u32 end;
__le32 start;
__le32 end;
};
struct virtio_iommu_config {
/* Supported page sizes */
__u64 page_size_mask;
__le64 page_size_mask;
/* Supported IOVA range */
struct virtio_iommu_range_64 input_range;
/* Max domain ID size */
struct virtio_iommu_range_32 domain_range;
/* Probe buffer size */
__u32 probe_size;
__le32 probe_size;
};
/* Request types */

View file

@ -185,27 +185,27 @@ struct virtio_mem_resp {
struct virtio_mem_config {
/* Block size and alignment. Cannot change. */
__u64 block_size;
__le64 block_size;
/* Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. */
__u16 node_id;
__le16 node_id;
__u8 padding[6];
/* Start address of the memory region. Cannot change. */
__u64 addr;
__le64 addr;
/* Region size (maximum). Cannot change. */
__u64 region_size;
__le64 region_size;
/*
* Currently usable region size. Can grow up to region_size. Can
* shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config
* update will be sent).
*/
__u64 usable_region_size;
__le64 usable_region_size;
/*
* Currently used size. Changes due to plug/unplug requests, but no
* config updates will be sent.
*/
__u64 plugged_size;
__le64 plugged_size;
/* Requested size. New plug requests cannot exceed it. Can change. */
__u64 requested_size;
__le64 requested_size;
};
#endif /* _LINUX_VIRTIO_MEM_H */

View file

@ -87,19 +87,19 @@ struct virtio_net_config {
/* The config defining mac address (if VIRTIO_NET_F_MAC) */
__u8 mac[ETH_ALEN];
/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
__u16 status;
__virtio16 status;
/* Maximum number of each of transmit and receive queues;
* see VIRTIO_NET_F_MQ and VIRTIO_NET_CTRL_MQ.
* Legal values are between 1 and 0x8000
*/
__u16 max_virtqueue_pairs;
__virtio16 max_virtqueue_pairs;
/* Default maximum transmit unit advice */
__u16 mtu;
__virtio16 mtu;
/*
* speed, in units of 1Mb. All values 0 to INT_MAX are legal.
* Any other value stands for unknown.
*/
__u32 speed;
__le32 speed;
/*
* 0x00 - half duplex
* 0x01 - full duplex

View file

@ -15,8 +15,8 @@
#include <linux/virtio_config.h>
struct virtio_pmem_config {
__u64 start;
__u64 size;
__le64 start;
__le64 size;
};
#define VIRTIO_PMEM_REQ_TYPE_FLUSH 0

View file

@ -103,16 +103,16 @@ struct virtio_scsi_event {
} __attribute__((packed));
struct virtio_scsi_config {
__u32 num_queues;
__u32 seg_max;
__u32 max_sectors;
__u32 cmd_per_lun;
__u32 event_info_size;
__u32 sense_size;
__u32 cdb_size;
__u16 max_channel;
__u16 max_target;
__u32 max_lun;
__virtio32 num_queues;
__virtio32 seg_max;
__virtio32 max_sectors;
__virtio32 cmd_per_lun;
__virtio32 event_info_size;
__virtio32 sense_size;
__virtio32 cdb_size;
__virtio16 max_channel;
__virtio16 max_target;
__virtio32 max_lun;
} __attribute__((packed));
/* Feature Bits */

View file

@ -42,16 +42,16 @@ static inline void __virtio_clear_bit(struct virtio_device *vdev,
(__virtio_test_bit((dev), feature))
/**
* virtio_has_iommu_quirk - determine whether this device has the iommu quirk
* virtio_has_dma_quirk - determine whether this device has the DMA quirk
* @vdev: the device
*/
static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev)
static inline bool virtio_has_dma_quirk(const struct virtio_device *vdev)
{
/*
* Note the reverse polarity of the quirk feature (compared to most
* other features), this is for compatibility with legacy systems.
*/
return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
return !virtio_has_feature(vdev, VIRTIO_F_ACCESS_PLATFORM);
}
static inline bool virtio_is_little_endian(struct virtio_device *vdev)

View file

@ -40,17 +40,21 @@ static int __connect(struct irq_bypass_producer *prod,
if (prod->add_consumer)
ret = prod->add_consumer(prod, cons);
if (!ret) {
ret = cons->add_producer(cons, prod);
if (ret && prod->del_consumer)
prod->del_consumer(prod, cons);
}
if (ret)
goto err_add_consumer;
ret = cons->add_producer(cons, prod);
if (ret)
goto err_add_producer;
if (cons->start)
cons->start(cons);
if (prod->start)
prod->start(prod);
err_add_producer:
if (prod->del_consumer)
prod->del_consumer(prod, cons);
err_add_consumer:
return ret;
}