From mboxrd@z Thu Jan 1 00:00:00 1970 From: Xiao Wang Subject: [PATCH] net/ifc: add live migration support Date: Mon, 10 Sep 2018 19:01:23 +0800 Message-ID: <20180910110123.138273-1-xiao.w.wang@intel.com> Cc: dev@dpdk.org, xiaolong.ye@intel.com, zhihong.wang@intel.com, Xiao Wang To: tiwei.bie@intel.com Return-path: Received: from mga06.intel.com (mga06.intel.com [134.134.136.31]) by dpdk.org (Postfix) with ESMTP id EF6C51B19 for ; Mon, 10 Sep 2018 13:09:01 +0200 (CEST) List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" IFCVF can help to log dirty page in live migration stage, each queue's index can be read and configured to support VHOST_USER_GET_VRING_BASE and VHOST_USER_SET_VRING_BASE. Signed-off-by: Xiao Wang --- drivers/net/ifc/base/ifcvf.c | 33 +++++++++++++++++++- drivers/net/ifc/base/ifcvf.h | 7 +++++ drivers/net/ifc/ifcvf_vdpa.c | 71 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 108 insertions(+), 3 deletions(-) diff --git a/drivers/net/ifc/base/ifcvf.c b/drivers/net/ifc/base/ifcvf.c index 4b22d9ed1..3c0b2dff6 100644 --- a/drivers/net/ifc/base/ifcvf.c +++ b/drivers/net/ifc/base/ifcvf.c @@ -249,7 +249,7 @@ ifcvf_hw_disable(struct ifcvf_hw *hw) IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector); ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET + (i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4); - hw->vring[i].last_avail_idx = (u16)ring_state; + hw->vring[i].last_avail_idx = (u16)(ring_state >> 16); hw->vring[i].last_used_idx = (u16)(ring_state >> 16); } } @@ -278,6 +278,37 @@ ifcvf_stop_hw(struct ifcvf_hw *hw) ifcvf_reset(hw); } +void +ifcvf_enable_logging(struct ifcvf_hw *hw, u64 log_base, u64 log_size) +{ + u8 *lm_cfg; + + lm_cfg = hw->lm_cfg; + + *(u32 *)(lm_cfg + IFCVF_LM_BASE_ADDR_LOW) = + log_base & IFCVF_32_BIT_MASK; + + *(u32 *)(lm_cfg + IFCVF_LM_BASE_ADDR_HIGH) = + (log_base >> 32) & IFCVF_32_BIT_MASK; + + *(u32 *)(lm_cfg + IFCVF_LM_END_ADDR_LOW) = + (log_base + log_size) & IFCVF_32_BIT_MASK; + + *(u32 *)(lm_cfg + IFCVF_LM_END_ADDR_HIGH) = + ((log_base + log_size) >> 32) & IFCVF_32_BIT_MASK; + + *(u32 *)(lm_cfg + IFCVF_LM_LOGGING_CTRL) = IFCVF_LM_ENABLE_VF; +} + +void +ifcvf_disable_logging(struct ifcvf_hw *hw) +{ + u8 *lm_cfg; + + lm_cfg = hw->lm_cfg; + *(u32 *)(lm_cfg + IFCVF_LM_LOGGING_CTRL) = IFCVF_LM_DISABLE; +} + void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid) { diff --git a/drivers/net/ifc/base/ifcvf.h b/drivers/net/ifc/base/ifcvf.h index badacb615..f026c70ab 100644 --- a/drivers/net/ifc/base/ifcvf.h +++ b/drivers/net/ifc/base/ifcvf.h @@ -49,6 +49,7 @@ #define IFCVF_LM_DISABLE 0x0 #define IFCVF_LM_ENABLE_VF 0x1 #define IFCVF_LM_ENABLE_PF 0x3 +#define IFCVF_LOG_BASE 0x100000000000 #define IFCVF_32_BIT_MASK 0xffffffff @@ -142,6 +143,12 @@ ifcvf_start_hw(struct ifcvf_hw *hw); void ifcvf_stop_hw(struct ifcvf_hw *hw); +void +ifcvf_enable_logging(struct ifcvf_hw *hw, u64 log_base, u64 log_size); + +void +ifcvf_disable_logging(struct ifcvf_hw *hw); + void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid); diff --git a/drivers/net/ifc/ifcvf_vdpa.c b/drivers/net/ifc/ifcvf_vdpa.c index 88d814037..3c5430dc0 100644 --- a/drivers/net/ifc/ifcvf_vdpa.c +++ b/drivers/net/ifc/ifcvf_vdpa.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -276,12 +277,30 @@ vdpa_ifcvf_start(struct ifcvf_internal *internal) return ifcvf_start_hw(&internal->hw); } +static void +ifcvf_used_ring_log(struct ifcvf_hw *hw, uint32_t queue, uint8_t *log_buf) +{ + uint32_t i, size; + uint64_t pfn; + + pfn = hw->vring[queue].used / PAGE_SIZE; + size = hw->vring[queue].size * sizeof(struct vring_used_elem) + + sizeof(__virtio16) * 3; + + for (i = 0; i <= size / PAGE_SIZE; i++) + __sync_fetch_and_or_8(&log_buf[(pfn + i) / 8], + 1 << ((pfn + i) % 8)); +} + static void vdpa_ifcvf_stop(struct ifcvf_internal *internal) { struct ifcvf_hw *hw = &internal->hw; uint32_t i; int vid; + uint64_t features; + uint64_t log_base, log_size; + uint8_t *log_buf; vid = internal->vid; ifcvf_stop_hw(hw); @@ -289,6 +308,21 @@ vdpa_ifcvf_stop(struct ifcvf_internal *internal) for (i = 0; i < hw->nr_vring; i++) rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx, hw->vring[i].last_used_idx); + + rte_vhost_get_negotiated_features(vid, &features); + if (RTE_VHOST_NEED_LOG(features)) { + ifcvf_disable_logging(hw); + rte_vhost_get_log_base(internal->vid, &log_base, &log_size); + rte_vfio_container_dma_unmap(internal->vfio_container_fd, + log_base, IFCVF_LOG_BASE, log_size); + /* + * IFCVF marks dirty memory pages for only packet buffer, + * SW helps to mark the used ring as dirty after device stops. + */ + log_buf = (uint8_t *)(uintptr_t)log_base; + for (i = 0; i < hw->nr_vring; i++) + ifcvf_used_ring_log(hw, i, log_buf); + } } #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \ @@ -548,6 +582,35 @@ ifcvf_dev_close(int vid) return 0; } +static int +ifcvf_set_features(int vid) +{ + uint64_t features; + int did; + struct internal_list *list; + struct ifcvf_internal *internal; + uint64_t log_base, log_size; + + did = rte_vhost_get_vdpa_device_id(vid); + list = find_internal_resource_by_did(did); + if (list == NULL) { + DRV_LOG(ERR, "Invalid device id: %d", did); + return -1; + } + + internal = list->internal; + rte_vhost_get_negotiated_features(vid, &features); + + if (RTE_VHOST_NEED_LOG(features)) { + rte_vhost_get_log_base(vid, &log_base, &log_size); + rte_vfio_container_dma_map(internal->vfio_container_fd, + log_base, IFCVF_LOG_BASE, log_size); + ifcvf_enable_logging(&internal->hw, IFCVF_LOG_BASE, log_size); + } + + return 0; +} + static int ifcvf_get_vfio_group_fd(int vid) { @@ -664,7 +727,7 @@ struct rte_vdpa_dev_ops ifcvf_ops = { .dev_conf = ifcvf_dev_config, .dev_close = ifcvf_dev_close, .set_vring_state = NULL, - .set_features = NULL, + .set_features = ifcvf_set_features, .migration_done = NULL, .get_vfio_group_fd = ifcvf_get_vfio_group_fd, .get_vfio_device_fd = ifcvf_get_vfio_device_fd, @@ -699,7 +762,11 @@ ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, features = ifcvf_get_features(&internal->hw); internal->features = (features & ~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) | - (1ULL << VHOST_USER_F_PROTOCOL_FEATURES); + (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | + (1ULL << VIRTIO_NET_F_CTRL_VQ) | + (1ULL << VIRTIO_NET_F_STATUS) | + (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | + (1ULL << VHOST_F_LOG_ALL); internal->dev_addr.pci_addr = pci_dev->addr; internal->dev_addr.type = PCI_ADDR; -- 2.15.1