From mboxrd@z Thu Jan 1 00:00:00 1970 From: Daniel Mrzyglod Subject: =?utf-8?q?=5BPATCH_v4_1/2=5D_This_patch_add_support_of?= =?utf-8?q?_mode_5_to_link_bonding_pmd?= Date: Thu, 27 Nov 2014 16:33:40 +0000 Message-ID: <1417106021-22842-2-git-send-email-danielx.t.mrzyglod@intel.com> References: <1417106021-22842-1-git-send-email-danielx.t.mrzyglod@intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable To: dev-VfR2kkLFssw@public.gmane.org Return-path: In-Reply-To: <1417106021-22842-1-git-send-email-danielx.t.mrzyglod-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces-VfR2kkLFssw@public.gmane.org Sender: "dev" Add support for mode 5 (Transmit load balancing) into pmd driver This patch add support for Adaptive transmit load balancing (mode 5) to t= he librte_pmd_bond library. This mode provides an adaptive transmit load=20 balancing. It dynamically changes the transmitting slave, according to th= e=20 computed load.=20 Further details are described here: https://www.kernel.org/doc/Documentation/networking/bonding.txt In implementation callback is used for sorting slave order - providing=20 statistics for burst function about slave bandwith usage and sort=20 interfaces due to usage. Difference in this implementation vs Linux implementation: - We Are trying send all pkts =E2=80=93 If one interface hasn=E2=80=99t s= end packets we are=20 trying to send rest of packets by other slaves sorted previously by callb= ack=20 function. Some implementation details: - Every 100ms is taken obytes statistics from every slave. - Every 10 ms the slaves in table are sorted and updated by callback -=20 bandwidth and successfully transmitted bytes from previous iteration whic= h=20 happens every 100 ms - There is callback function which updates this statistics for transparen= cy and for rather intensive computation involved in this mode. Signed-off-by: Daniel Mrzyglod --- lib/librte_pmd_bond/rte_eth_bond.h | 6 ++ lib/librte_pmd_bond/rte_eth_bond_args.c | 1 + lib/librte_pmd_bond/rte_eth_bond_pmd.c | 160 +++++++++++++++++++++++= +++++- lib/librte_pmd_bond/rte_eth_bond_private.h | 2 +- 4 files changed, 166 insertions(+), 3 deletions(-) diff --git a/lib/librte_pmd_bond/rte_eth_bond.h b/lib/librte_pmd_bond/rte= _eth_bond.h index 085500b..3831f56 100644 --- a/lib/librte_pmd_bond/rte_eth_bond.h +++ b/lib/librte_pmd_bond/rte_eth_bond.h @@ -77,6 +77,12 @@ extern "C" { * In this mode all transmitted packets will be transmitted on all avail= able * active slaves of the bonded. */ #endif +#define BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING (5) +/**< Adaptive TLB (Mode 5) + * This mode provides an adaptive transmit load balancing. It dynamicall= y + * changes the transmitting slave, according to the computed load. Stati= stics + * are collected in 100ms intervals and scheduled every 10ms */ + /* Balance Mode Transmit Policies */ #define BALANCE_XMIT_POLICY_LAYER2 (0) /**< Layer 2 (Ethernet MAC) */ diff --git a/lib/librte_pmd_bond/rte_eth_bond_args.c b/lib/librte_pmd_bon= d/rte_eth_bond_args.c index d8ce681..2675cf6 100644 --- a/lib/librte_pmd_bond/rte_eth_bond_args.c +++ b/lib/librte_pmd_bond/rte_eth_bond_args.c @@ -173,6 +173,7 @@ bond_ethdev_parse_slave_mode_kvarg(const char *key __= rte_unused, #ifdef RTE_MBUF_REFCNT case BONDING_MODE_BROADCAST: #endif + case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING: return 0; default: RTE_BOND_LOG(ERR, "Invalid slave mode value (%s) specified", value); diff --git a/lib/librte_pmd_bond/rte_eth_bond_pmd.c b/lib/librte_pmd_bond= /rte_eth_bond_pmd.c index cf2fbab..7a5dae6 100644 --- a/lib/librte_pmd_bond/rte_eth_bond_pmd.c +++ b/lib/librte_pmd_bond/rte_eth_bond_pmd.c @@ -30,7 +30,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE U= SE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE= . */ - +#include #include #include #include @@ -41,10 +41,15 @@ #include #include #include +#include =20 #include "rte_eth_bond.h" #include "rte_eth_bond_private.h" =20 +#define REORDER_PERIOD_MS 10 +/* Table for statistics in mode 5 TLB */ +static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS]; + static uint16_t bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pk= ts) { @@ -288,6 +293,144 @@ xmit_slave_hash(const struct rte_mbuf *buf, uint8_t= slave_count, uint8_t policy) return hash % slave_count; } =20 +struct bwg_slave { + uint64_t bwg_left_int; + uint64_t bwg_left_remainder; + uint8_t slave; +}; + +static int +bandwidth_cmp(const void *a, const void *b) +{ + const struct bwg_slave *bwg_a =3D a; + const struct bwg_slave *bwg_b =3D b; + int64_t diff =3D (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_lef= t_int; + int64_t diff2 =3D (int64_t)bwg_b->bwg_left_remainder - + (int64_t)bwg_a->bwg_left_remainder; + if (diff > 0) + return 1; + else if (diff < 0) + return -1; + else if (diff2 > 0) + return 1; + else if (diff2 < 0) + return -1; + else + return 0; +} + +static void +bandwidth_left(int port_id, uint64_t load, uint8_t update_idx, + struct bwg_slave *bwg_slave) +{ + struct rte_eth_link link_status; + + rte_eth_link_get(port_id, &link_status); + uint64_t link_bwg =3D link_status.link_speed * 1000000ULL / 8; + if (link_bwg =3D=3D 0) + return; + link_bwg =3D (link_bwg * (update_idx+1) * REORDER_PERIOD_MS); + bwg_slave->bwg_left_int =3D (link_bwg - 1000*load) / link_bwg; + bwg_slave->bwg_left_remainder =3D (link_bwg - 1000*load) % link_bwg; +} + +static void +bond_ethdev_update_tlb_slave_cb(void *arg) +{ + struct bond_dev_private *internals =3D arg; + struct rte_eth_stats slave_stats; + struct bwg_slave bwg_array[RTE_MAX_ETHPORTS]; + uint8_t slave_count; + uint64_t tx_bytes; + + uint8_t update_stats =3D 0; + uint8_t i, slave_id; + + internals->slave_update_idx++; + + + if (internals->slave_update_idx >=3D REORDER_PERIOD_MS) + update_stats =3D 1; + + for (i =3D 0; i < internals->active_slave_count; i++) { + slave_id =3D internals->active_slaves[i]; + rte_eth_stats_get(slave_id, &slave_stats); + tx_bytes =3D slave_stats.obytes - tlb_last_obytets[slave_id]; + bandwidth_left(slave_id, tx_bytes, + internals->slave_update_idx, &bwg_array[i]); + bwg_array[i].slave =3D slave_id; + + if (update_stats) + tlb_last_obytets[slave_id] =3D slave_stats.obytes; + } + + if (update_stats =3D=3D 1) + internals->slave_update_idx =3D 0; + + slave_count =3D i; + qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp); + for (i =3D 0; i < slave_count; i++) + internals->active_slaves[i] =3D bwg_array[i].slave; + + rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slav= e_cb, + (struct bond_dev_private *)internals); +} + +static uint16_t +bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t n= b_pkts) +{ + struct bond_tx_queue *bd_tx_q =3D (struct bond_tx_queue *)queue; + struct bond_dev_private *internals =3D bd_tx_q->dev_private; + + struct rte_eth_dev *primary_port =3D + &rte_eth_devices[internals->primary_port]; + uint16_t num_tx_total =3D 0; + uint8_t i, j; + + uint8_t num_of_slaves =3D internals->active_slave_count; + uint8_t slaves[RTE_MAX_ETHPORTS]; + + struct ether_hdr *ether_hdr; + struct ether_addr primary_slave_addr; + struct ether_addr active_slave_addr; + + if (num_of_slaves < 1) + return num_tx_total; + + memcpy(slaves, internals->active_slaves, + sizeof(internals->active_slaves[0]) * num_of_slaves); + + + ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr); + + if (nb_pkts > 3) { + for (i =3D 0; i < 3; i++) + rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*)); + } + + for (i =3D 0; i < num_of_slaves; i++) { + ether_addr_copy(&internals->slaves[slaves[i]].persisted_mac_addr, + &active_slave_addr); + + for (j =3D num_tx_total; j < nb_pkts; j++) { + if (j + 3 < nb_pkts) + rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*)); + + ether_hdr =3D rte_pktmbuf_mtod(bufs[j], struct ether_hdr *); + if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr)) + ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr); + } + + num_tx_total +=3D rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, + bufs + num_tx_total, nb_pkts - num_tx_total); + + if (num_tx_total =3D=3D nb_pkts) + break; + } + + return num_tx_total; +} + static uint16_t bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) @@ -500,6 +643,7 @@ mac_address_slaves_update(struct rte_eth_dev *bonded_= eth_dev) } break; case BONDING_MODE_ACTIVE_BACKUP: + case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING: default: for (i =3D 0; i < internals->slave_count; i++) { if (internals->slaves[i].port_id =3D=3D @@ -551,6 +695,10 @@ bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, in= t mode) eth_dev->rx_pkt_burst =3D bond_ethdev_rx_burst; break; #endif + case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING: + eth_dev->tx_pkt_burst =3D bond_ethdev_tx_burst_tlb; + eth_dev->rx_pkt_burst =3D bond_ethdev_rx_burst_active_backup; + break; default: return -1; } @@ -676,7 +824,7 @@ slave_add(struct bond_dev_private *internals, } =20 slave_details->link_status_wait_to_complete =3D 0; - + /* clean tlb_last_obytes when adding port for bonding device */ memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_a= ddrs, sizeof(struct ether_addr)); } @@ -762,6 +910,9 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev) if (internals->user_defined_primary_port) bond_ethdev_primary_set(internals, internals->primary_port); =20 + if (internals->mode =3D=3D BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCIN= G) + bond_ethdev_update_tlb_slave_cb(internals); + return 0; } =20 @@ -770,6 +921,9 @@ bond_ethdev_stop(struct rte_eth_dev *eth_dev) { struct bond_dev_private *internals =3D eth_dev->data->dev_private; =20 + if (internals->mode =3D=3D BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCIN= G) { + rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals); + } internals->active_slave_count =3D 0; internals->link_status_polling_enabled =3D 0; =20 @@ -1016,6 +1170,7 @@ bond_ethdev_promiscuous_enable(struct rte_eth_dev *= eth_dev) break; /* Promiscuous mode is propagated only to primary slave */ case BONDING_MODE_ACTIVE_BACKUP: + case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING: default: rte_eth_promiscuous_enable(internals->current_primary_port); =20 @@ -1042,6 +1197,7 @@ bond_ethdev_promiscuous_disable(struct rte_eth_dev = *dev) break; /* Promiscuous mode is propagated only to primary slave */ case BONDING_MODE_ACTIVE_BACKUP: + case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING: default: rte_eth_promiscuous_disable(internals->current_primary_port); } diff --git a/lib/librte_pmd_bond/rte_eth_bond_private.h b/lib/librte_pmd_= bond/rte_eth_bond_private.h index 6254c84..2a4e129 100644 --- a/lib/librte_pmd_bond/rte_eth_bond_private.h +++ b/lib/librte_pmd_bond/rte_eth_bond_private.h @@ -102,7 +102,6 @@ struct bond_slave_details { uint8_t link_status_poll_enabled; uint8_t link_status_wait_to_complete; uint8_t last_link_status; - /**< Port Id of slave eth_dev */ struct ether_addr persisted_mac_addr; }; @@ -145,6 +144,7 @@ struct bond_dev_private { /**< Arary of bonded slaves details */ =20 struct rte_kvargs *kvlist; + uint8_t slave_update_idx; }; =20 extern struct eth_dev_ops default_dev_ops; --=20 2.1.0