* [[Xen-devel] [PATCH net-next v3] xen-netfront: clean up code in xennet_release_rx_bufs
From: Annie Li @ 2014-01-23 1:36 UTC (permalink / raw)
To: xen-devel, netdev
Cc: konrad.wilk, ian.campbell, wei.liu2, david.vrabel, annie.li
From: Annie Li <annie.li@oracle.com>
This patch removes grant transfer code from netfront, and improves ending
grant acess mechanism since gnttab_end_foreign_access_ref may fail when
the grant entry is currently used for reading or writing.
* release grant reference and skb for tx/rx path, use get_page/put_page to
ensure page is released when grant access is completed successfully.
* change corresponding code in xen-blkfront/xen-tpmfront/xen-pcifront because
of code change for put_page in gnttab_end_foreign_access.
* clean up grant transfer code kept from old netfront(2.6.18) which grants
pages for access/map and transfer. But grant transfer is deprecated in current
netfront, so remove corresponding release code for transfer.
V3: Changes as suggestion from David Vrabel, ensure pages are not freed untill
grant acess is ended.
V2: improve patch comments.
Signed-off-by: Annie Li <annie.li@oracle.com>
---
drivers/block/xen-blkfront.c | 25 ++++++++---
drivers/char/tpm/xen-tpmfront.c | 7 +++-
drivers/net/xen-netfront.c | 93 ++++++++++++--------------------------
drivers/pci/xen-pcifront.c | 7 +++-
drivers/xen/grant-table.c | 4 +-
5 files changed, 63 insertions(+), 73 deletions(-)
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index c4a4c90..c300bfd 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -918,6 +918,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
struct grant *persistent_gnt;
struct grant *n;
int i, j, segs;
+ struct page *page;
/* Prevent new requests being issued until we fix things up. */
spin_lock_irq(&info->io_lock);
@@ -932,13 +933,16 @@ static void blkif_free(struct blkfront_info *info, int suspend)
list_for_each_entry_safe(persistent_gnt, n,
&info->grants, node) {
list_del(&persistent_gnt->node);
+ page = pfn_to_page(persistent_gnt->pfn);
if (persistent_gnt->gref != GRANT_INVALID_REF) {
+ get_page(page);
gnttab_end_foreign_access(persistent_gnt->gref,
- 0, 0UL);
+ 0,
+ (unsigned long)page_address(page));
info->persistent_gnts_c--;
}
if (info->feature_persistent)
- __free_page(pfn_to_page(persistent_gnt->pfn));
+ __free_page(page);
kfree(persistent_gnt);
}
}
@@ -971,9 +975,12 @@ static void blkif_free(struct blkfront_info *info, int suspend)
info->shadow[i].req.u.rw.nr_segments;
for (j = 0; j < segs; j++) {
persistent_gnt = info->shadow[i].grants_used[j];
- gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+ page = pfn_to_page(persistent_gnt->pfn);
+ get_page(page);
+ gnttab_end_foreign_access(persistent_gnt->gref, 0,
+ (unsigned long)page_address(page));
if (info->feature_persistent)
- __free_page(pfn_to_page(persistent_gnt->pfn));
+ __free_page(page);
kfree(persistent_gnt);
}
@@ -986,8 +993,11 @@ static void blkif_free(struct blkfront_info *info, int suspend)
for (j = 0; j < INDIRECT_GREFS(segs); j++) {
persistent_gnt = info->shadow[i].indirect_grants[j];
- gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
- __free_page(pfn_to_page(persistent_gnt->pfn));
+ page = pfn_to_page(persistent_gnt->pfn);
+ get_page(page);
+ gnttab_end_foreign_access(persistent_gnt->gref, 0,
+ (unsigned long)page_address(page));
+ __free_page(page);
kfree(persistent_gnt);
}
@@ -1009,8 +1019,11 @@ free_shadow:
/* Free resources associated with old device channel. */
if (info->ring_ref != GRANT_INVALID_REF) {
+ page = virt_to_page(info->ring.sring);
+ get_page(page);
gnttab_end_foreign_access(info->ring_ref, 0,
(unsigned long)info->ring.sring);
+ __free_page(page);
info->ring_ref = GRANT_INVALID_REF;
info->ring.sring = NULL;
}
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
index c8ff4df..3e83585 100644
--- a/drivers/char/tpm/xen-tpmfront.c
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -312,9 +312,14 @@ static void ring_free(struct tpm_private *priv)
if (!priv)
return;
- if (priv->ring_ref)
+ if (priv->ring_ref) {
+ struct page *page;
+ page = virt_to_page(priv->shr);
+ get_page(page);
gnttab_end_foreign_access(priv->ring_ref, 0,
(unsigned long)priv->shr);
+ __free_page(page);
+ }
else
free_page((unsigned long)priv->shr);
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index d7bee8a..a22adaa 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -117,6 +117,7 @@ struct netfront_info {
} tx_skbs[NET_TX_RING_SIZE];
grant_ref_t gref_tx_head;
grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
+ struct page *grant_tx_page[NET_TX_RING_SIZE];
unsigned tx_skb_freelist;
spinlock_t rx_lock ____cacheline_aligned_in_smp;
@@ -396,6 +397,7 @@ static void xennet_tx_buf_gc(struct net_device *dev)
gnttab_release_grant_reference(
&np->gref_tx_head, np->grant_tx_ref[id]);
np->grant_tx_ref[id] = GRANT_INVALID_REF;
+ np->grant_tx_page[id] = NULL;
add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id);
dev_kfree_skb_irq(skb);
}
@@ -452,6 +454,7 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
mfn, GNTMAP_readonly);
+ np->grant_tx_page[id] = virt_to_page(data);
tx->gref = np->grant_tx_ref[id] = ref;
tx->offset = offset;
tx->size = len;
@@ -497,6 +500,7 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
np->xbdev->otherend_id,
mfn, GNTMAP_readonly);
+ np->grant_tx_page[id] = page;
tx->gref = np->grant_tx_ref[id] = ref;
tx->offset = offset;
tx->size = bytes;
@@ -596,6 +600,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
mfn = virt_to_mfn(data);
gnttab_grant_foreign_access_ref(
ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
+ np->grant_tx_page[id] = virt_to_page(data);
tx->gref = np->grant_tx_ref[id] = ref;
tx->offset = offset;
tx->size = len;
@@ -1085,10 +1090,11 @@ static void xennet_release_tx_bufs(struct netfront_info *np)
continue;
skb = np->tx_skbs[i].skb;
- gnttab_end_foreign_access_ref(np->grant_tx_ref[i],
- GNTMAP_readonly);
- gnttab_release_grant_reference(&np->gref_tx_head,
- np->grant_tx_ref[i]);
+ get_page(np->grant_tx_page[i]);
+ gnttab_end_foreign_access(np->grant_tx_ref[i],
+ GNTMAP_readonly,
+ (unsigned long)page_address(np->grant_tx_page[i]));
+ np->grant_tx_page[i] = NULL;
np->grant_tx_ref[i] = GRANT_INVALID_REF;
add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, i);
dev_kfree_skb_irq(skb);
@@ -1097,78 +1103,35 @@ static void xennet_release_tx_bufs(struct netfront_info *np)
static void xennet_release_rx_bufs(struct netfront_info *np)
{
- struct mmu_update *mmu = np->rx_mmu;
- struct multicall_entry *mcl = np->rx_mcl;
- struct sk_buff_head free_list;
- struct sk_buff *skb;
- unsigned long mfn;
- int xfer = 0, noxfer = 0, unused = 0;
int id, ref;
- dev_warn(&np->netdev->dev, "%s: fix me for copying receiver.\n",
- __func__);
- return;
-
- skb_queue_head_init(&free_list);
-
spin_lock_bh(&np->rx_lock);
for (id = 0; id < NET_RX_RING_SIZE; id++) {
- ref = np->grant_rx_ref[id];
- if (ref == GRANT_INVALID_REF) {
- unused++;
- continue;
- }
+ struct sk_buff *skb;
+ struct page *page;
skb = np->rx_skbs[id];
- mfn = gnttab_end_foreign_transfer_ref(ref);
- gnttab_release_grant_reference(&np->gref_rx_head, ref);
- np->grant_rx_ref[id] = GRANT_INVALID_REF;
-
- if (0 == mfn) {
- skb_shinfo(skb)->nr_frags = 0;
- dev_kfree_skb(skb);
- noxfer++;
+ if (!skb)
continue;
- }
-
- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
- /* Remap the page. */
- const struct page *page =
- skb_frag_page(&skb_shinfo(skb)->frags[0]);
- unsigned long pfn = page_to_pfn(page);
- void *vaddr = page_address(page);
- MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
- mfn_pte(mfn, PAGE_KERNEL),
- 0);
- mcl++;
- mmu->ptr = ((u64)mfn << PAGE_SHIFT)
- | MMU_MACHPHYS_UPDATE;
- mmu->val = pfn;
- mmu++;
+ ref = np->grant_rx_ref[id];
+ if (ref == GRANT_INVALID_REF)
+ continue;
- set_phys_to_machine(pfn, mfn);
- }
- __skb_queue_tail(&free_list, skb);
- xfer++;
- }
+ page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
- dev_info(&np->netdev->dev, "%s: %d xfer, %d noxfer, %d unused\n",
- __func__, xfer, noxfer, unused);
+ /* gnttab_end_foreign_access() needs a page ref until
+ * foreign access is ended (which may be deferred).
+ */
+ get_page(page);
+ gnttab_end_foreign_access(ref, 0,
+ (unsigned long)page_address(page));
+ np->grant_rx_ref[id] = GRANT_INVALID_REF;
- if (xfer) {
- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
- /* Do all the remapping work and M2P updates. */
- MULTI_mmu_update(mcl, np->rx_mmu, mmu - np->rx_mmu,
- NULL, DOMID_SELF);
- mcl++;
- HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl);
- }
+ kfree_skb(skb);
}
- __skb_queue_purge(&free_list);
-
spin_unlock_bh(&np->rx_lock);
}
@@ -1339,6 +1302,7 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
for (i = 0; i < NET_RX_RING_SIZE; i++) {
np->rx_skbs[i] = NULL;
np->grant_rx_ref[i] = GRANT_INVALID_REF;
+ np->grant_tx_page[i] = NULL;
}
/* A grant for every tx ring slot */
@@ -1439,8 +1403,11 @@ static int netfront_probe(struct xenbus_device *dev,
static void xennet_end_access(int ref, void *page)
{
/* This frees the page as a side-effect */
- if (ref != GRANT_INVALID_REF)
+ if (ref != GRANT_INVALID_REF) {
+ get_page(virt_to_page(page));
gnttab_end_foreign_access(ref, 0, (unsigned long)page);
+ free_page((unsigned long)page);
+ }
}
static void xennet_disconnect_backend(struct netfront_info *info)
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index f7197a7..ed732e5 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -756,9 +756,14 @@ static void free_pdev(struct pcifront_device *pdev)
if (pdev->evtchn != INVALID_EVTCHN)
xenbus_free_evtchn(pdev->xdev, pdev->evtchn);
- if (pdev->gnt_ref != INVALID_GRANT_REF)
+ if (pdev->gnt_ref != INVALID_GRANT_REF) {
+ struct page *page;
+ page = virt_to_page(pdev->sh_info);
+ get_page(page);
gnttab_end_foreign_access(pdev->gnt_ref, 0 /* r/w page */,
(unsigned long)pdev->sh_info);
+ __free_page(page);
+ }
else
free_page((unsigned long)pdev->sh_info);
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index aa846a4..b64a32e 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -504,7 +504,7 @@ static void gnttab_handle_deferred(unsigned long unused)
if (entry->page) {
pr_debug("freeing g.e. %#x (pfn %#lx)\n",
entry->ref, page_to_pfn(entry->page));
- __free_page(entry->page);
+ put_page(entry->page);
} else
pr_info("freeing g.e. %#x\n", entry->ref);
kfree(entry);
@@ -560,7 +560,7 @@ void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
if (gnttab_end_foreign_access_ref(ref, readonly)) {
put_free_entry(ref);
if (page != 0)
- free_page(page);
+ put_page(virt_to_page(page));
} else
gnttab_add_deferred(ref, readonly,
page ? virt_to_page(page) : NULL);
--
1.7.3.4
^ permalink raw reply related
* [PATCH net-next 2/3] sch_netem: change some func's param from "struct Qdisc *" to "struct netem_sched_data *"
From: Yang Yingliang @ 2014-01-23 9:31 UTC (permalink / raw)
To: netdev; +Cc: stephen, davem
In-Reply-To: <1390469499-31952-1-git-send-email-yangyingliang@huawei.com>
In netem_change(), we have already get "struct netem_sched_data *q".
Replace params of get_correlation() and other similar functions with
"struct netem_sched_data *q".
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
---
net/sched/sch_netem.c | 25 ++++++++++---------------
1 file changed, 10 insertions(+), 15 deletions(-)
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index c90de21..7e15f08 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -689,9 +689,8 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
return 0;
}
-static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
+static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
{
- struct netem_sched_data *q = qdisc_priv(sch);
const struct tc_netem_corr *c = nla_data(attr);
init_crandom(&q->delay_cor, c->delay_corr);
@@ -699,27 +698,24 @@ static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
init_crandom(&q->dup_cor, c->dup_corr);
}
-static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
+static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr)
{
- struct netem_sched_data *q = qdisc_priv(sch);
const struct tc_netem_reorder *r = nla_data(attr);
q->reorder = r->probability;
init_crandom(&q->reorder_cor, r->correlation);
}
-static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
+static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr)
{
- struct netem_sched_data *q = qdisc_priv(sch);
const struct tc_netem_corrupt *r = nla_data(attr);
q->corrupt = r->probability;
init_crandom(&q->corrupt_cor, r->correlation);
}
-static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
+static void get_rate(struct netem_sched_data *q, const struct nlattr *attr)
{
- struct netem_sched_data *q = qdisc_priv(sch);
const struct tc_netem_rate *r = nla_data(attr);
q->rate = r->rate;
@@ -730,9 +726,8 @@ static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
q->cell_overhead = r->cell_overhead;
}
-static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
+static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr)
{
- struct netem_sched_data *q = qdisc_priv(sch);
const struct nlattr *la;
int rem;
@@ -836,7 +831,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
old_loss_model = q->loss_model;
if (tb[TCA_NETEM_LOSS]) {
- ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
+ ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
if (ret) {
q->loss_model = old_loss_model;
return ret;
@@ -874,16 +869,16 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
q->reorder = ~0;
if (tb[TCA_NETEM_CORR])
- get_correlation(sch, tb[TCA_NETEM_CORR]);
+ get_correlation(q, tb[TCA_NETEM_CORR]);
if (tb[TCA_NETEM_REORDER])
- get_reorder(sch, tb[TCA_NETEM_REORDER]);
+ get_reorder(q, tb[TCA_NETEM_REORDER]);
if (tb[TCA_NETEM_CORRUPT])
- get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
+ get_corrupt(q, tb[TCA_NETEM_CORRUPT]);
if (tb[TCA_NETEM_RATE])
- get_rate(sch, tb[TCA_NETEM_RATE]);
+ get_rate(q, tb[TCA_NETEM_RATE]);
if (tb[TCA_NETEM_RATE64])
q->rate = max_t(u64, q->rate,
--
1.8.0
^ permalink raw reply related
* Re: Linux kernel patch: elide fib_validate_source() completely when possible - bad side effect?
From: Julian Anastasov @ 2014-01-23 9:47 UTC (permalink / raw)
To: Hannes Frederic Sowa; +Cc: David Miller, gregory.hoggarth, netdev
In-Reply-To: <20140123030329.GI7269@order.stressinduktion.org>
Hello,
On Thu, 23 Jan 2014, Hannes Frederic Sowa wrote:
> E.g. we register all local registered broadcast addresses in a structure
> like inet_addr_lst so we only need to check if the packet would leave
> this host with a broadcast hardware address. If the packet is forwarded
> the router must do the same check as only it knows the local broadcast
> addresses. I hope this is correct. ;)
Now when we can override the local table with ip
rules or to prepend route in local table, we can not be
sure that the broadcast routes are returned in all cases,
users can add unicast routes for such addresses.
I don't remember for useful case where one may need to
override broadcast routes but adding such exceptions looks
risky.
Regards
--
Julian Anastasov <ja@ssi.bg>
^ permalink raw reply
* Re: [PATCH 4/4] ipv4: mark nexthop as dead when it's subnet becomes unreachable
From: Julian Anastasov @ 2014-01-23 10:06 UTC (permalink / raw)
To: Sergey Popovich; +Cc: netdev
In-Reply-To: <40044b636dbf7ae5bba5fe2873451e14438ec170.1390304505.git.popovich_sergei@mail.ru>
Hello,
On Tue, 21 Jan 2014, Sergey Popovich wrote:
> + if (nexthop_nh->nh_dev != dev ||
> + nexthop_nh->nh_scope == scope ||
> + (ifa && !inet_ifa_match(nexthop_nh->nh_gw, ifa)))
What if nh_gw is part from another smaller/larger subnet?
For example, what if we still have 10.0.0.200/8 ? 10.0.10.5 is
still reachable, i.e. fib_check_nh() would create such NH.
IMHO, marking NH by exact nh_gw looks more acceptable because
the exact GW becomes unreachable. Otherwise, you will need
fib_lookup() as in fib_check_nh() to check that NH becomes
unreachable.
Regards
--
Julian Anastasov <ja@ssi.bg>
^ permalink raw reply
* Re: [PATCH v2 net-next 05/12] bonding: extend arp_validate to be able to receive unvalidated arp-only traffic
From: Veaceslav Falico @ 2014-01-23 10:25 UTC (permalink / raw)
To: Jay Vosburgh; +Cc: netdev, Andy Gospodarek
In-Reply-To: <32086.1389991289@death.nxdomain>
On Fri, Jan 17, 2014 at 12:41:29PM -0800, Jay Vosburgh wrote:
>Veaceslav Falico <vfalico@redhat.com> wrote:
...snip...
>>--- a/drivers/net/bonding/bonding.h
>>+++ b/drivers/net/bonding/bonding.h
>>@@ -318,6 +318,11 @@ static inline bool bond_is_active_slave(struct slave *slave)
>> #define BOND_ARP_VALIDATE_BACKUP (1 << BOND_STATE_BACKUP)
>> #define BOND_ARP_VALIDATE_ALL (BOND_ARP_VALIDATE_ACTIVE | \
>> BOND_ARP_VALIDATE_BACKUP)
>>+#define BOND_ARP_VALIDATE_ARP (BOND_ARP_VALIDATE_ALL + 1)
>>+#define BOND_ARP_VALIDATE_ACTIVE_ARP (BOND_ARP_VALIDATE_ACTIVE | \
>>+ BOND_ARP_VALIDATE_ARP)
>>+#define BOND_ARP_VALIDATE_BACKUP_ARP (BOND_ARP_VALIDATE_BACKUP | \
>>+ BOND_ARP_VALIDATE_ARP)
>
> If you go with my suggestion to call the new thing "filtering,"
>I'd change these option names, labels, and the function
>"slave_do_arp_validate_only" names. The function name seems kind of
>confusing in particular. I think it'd be clearer to replace the
>"validate" stuff with "filter."
Hi Jay,
Sorry for the delay. Yep, completely agree, I'll change the doc/rename the
functions/defines to use "filtering" instead of double-arp.
I'll re-send it once I'll fix the current bonding state...
Thank you!
>
> -J
>
>> static inline int slave_do_arp_validate(struct bonding *bond,
>> struct slave *slave)
>>@@ -325,6 +330,12 @@ static inline int slave_do_arp_validate(struct bonding *bond,
>> return bond->params.arp_validate & (1 << bond_slave_state(slave));
>> }
>>
>>+static inline int slave_do_arp_validate_only(struct bonding *bond,
>>+ struct slave *slave)
>>+{
>>+ return bond->params.arp_validate & BOND_ARP_VALIDATE_ARP;
>>+}
>>+
>> /* Get the oldest arp which we've received on this slave for bond's
>> * arp_targets.
>> */
>>--
>>1.8.4
>>
>
>---
> -Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
>
^ permalink raw reply
* bmc 57810 UP link
From: hallouin.matthieu @ 2014-01-23 10:39 UTC (permalink / raw)
To: netdev
hi,
I'm sorry to disturb you but I'm looking for how to force bmc 57810 UP link if you only connect the RX fiber SFP +?
This in order not to send the frame via the fiber TX.
when I made a ethtool, I still no link.
thank you
^ permalink raw reply
* Re: [[Xen-devel] [PATCH net-next v3] xen-netfront: clean up code in xennet_release_rx_bufs
From: David Vrabel @ 2014-01-23 10:55 UTC (permalink / raw)
To: Annie Li; +Cc: xen-devel, netdev, konrad.wilk, ian.campbell, wei.liu2
In-Reply-To: <1390441011-3816-1-git-send-email-Annie.li@oracle.com>
On 23/01/14 01:36, Annie Li wrote:
> From: Annie Li <annie.li@oracle.com>
>
> This patch removes grant transfer code from netfront, and improves ending
> grant acess mechanism since gnttab_end_foreign_access_ref may fail when
> the grant entry is currently used for reading or writing.
>
> * release grant reference and skb for tx/rx path, use get_page/put_page to
> ensure page is released when grant access is completed successfully.
> * change corresponding code in xen-blkfront/xen-tpmfront/xen-pcifront because
> of code change for put_page in gnttab_end_foreign_access.
> * clean up grant transfer code kept from old netfront(2.6.18) which grants
> pages for access/map and transfer. But grant transfer is deprecated in current
> netfront, so remove corresponding release code for transfer.
>
> V3: Changes as suggestion from David Vrabel, ensure pages are not freed untill
> grant acess is ended.
>
> V2: improve patch comments.
> Signed-off-by: Annie Li <annie.li@oracle.com>
> ---
> drivers/block/xen-blkfront.c | 25 ++++++++---
> drivers/char/tpm/xen-tpmfront.c | 7 +++-
> drivers/net/xen-netfront.c | 93 ++++++++++++--------------------------
> drivers/pci/xen-pcifront.c | 7 +++-
> drivers/xen/grant-table.c | 4 +-
> 5 files changed, 63 insertions(+), 73 deletions(-)
I don't understand why you've made all these unnecessary changes to the
other frontends and grant-table.c.
The xen-netfront.c changes are fine on their own.
David
^ permalink raw reply
* [PATCH net-next 0/2] bonding: fix locking in bond_ab_arp_probe
From: Veaceslav Falico @ 2014-01-23 11:16 UTC (permalink / raw)
To: netdev; +Cc: Jay Vosburgh, Andy Gospodarek, Veaceslav Falico
Hi,
After the latest patches, on every call of bond_ab_arp_probe() without an
active slave I see the following warning:
[ 7.912314] RTNL: assertion failed at net/core/dev.c (4494)
...
[ 7.922495] [<ffffffff817acc6f>] dump_stack+0x51/0x72
[ 7.923714] [<ffffffff8168795e>] netdev_master_upper_dev_get+0x6e/0x70
[ 7.924940] [<ffffffff816a2a66>] rtnl_link_fill+0x116/0x260
[ 7.926143] [<ffffffff817acc6f>] ? dump_stack+0x51/0x72
[ 7.927333] [<ffffffff816a350c>] rtnl_fill_ifinfo+0x95c/0xb90
[ 7.928529] [<ffffffff8167af2b>] ? __kmalloc_reserve+0x3b/0xa0
[ 7.929681] [<ffffffff8167bfcf>] ? __alloc_skb+0x9f/0x1e0
[ 7.930827] [<ffffffff816a3b64>] rtmsg_ifinfo+0x84/0x100
[ 7.931960] [<ffffffffa00bca07>] bond_ab_arp_probe+0x1a7/0x370 [bonding]
[ 7.933133] [<ffffffffa00bcd78>] bond_activebackup_arp_mon+0x1a8/0x2f0 [bonding]
...
It happens because in bond_ab_arp_probe() we change the flags of a slave
without holding the RTNL lock.
To fix this - remove the useless curr_active_lock, RCUify it completely and
lock RTNL while changing the slave's flags.
CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
CC: netdev@vger.kernel.org
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---
drivers/net/bonding/bond_main.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
^ permalink raw reply
* [PATCH net-next 1/2] bonding: RCUify bond_ab_arp_probe
From: Veaceslav Falico @ 2014-01-23 11:16 UTC (permalink / raw)
To: netdev; +Cc: Veaceslav Falico, Jay Vosburgh, Andy Gospodarek
In-Reply-To: <1390475764-7675-1-git-send-email-vfalico@redhat.com>
Currently bond_ab_arp_probe() is always called under rcu_read_lock(),
however to work with curr_active_slave we're still holding the
curr_slave_lock.
To remove that curr_slave_lock - rcu_dereference the bond's
curr_active_slave and use it further - so that we're sure the slave won't
go away, and we don't care if it will change in the meanwhile.
CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---
drivers/net/bonding/bond_main.c | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index f9e0c8b..22d8b69 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2605,25 +2605,21 @@ do_failover:
static void bond_ab_arp_probe(struct bonding *bond)
{
struct slave *slave, *before = NULL, *new_slave = NULL,
- *curr_arp_slave = rcu_dereference(bond->current_arp_slave);
+ *curr_arp_slave = rcu_dereference(bond->current_arp_slave),
+ *curr_active_slave = rcu_dereference(bond->curr_active_slave);
struct list_head *iter;
bool found = false;
- read_lock(&bond->curr_slave_lock);
-
- if (curr_arp_slave && bond->curr_active_slave)
+ if (curr_arp_slave && curr_active_slave)
pr_info("PROBE: c_arp %s && cas %s BAD\n",
curr_arp_slave->dev->name,
- bond->curr_active_slave->dev->name);
+ curr_active_slave->dev->name);
- if (bond->curr_active_slave) {
- bond_arp_send_all(bond, bond->curr_active_slave);
- read_unlock(&bond->curr_slave_lock);
+ if (curr_active_slave) {
+ bond_arp_send_all(bond, curr_active_slave);
return;
}
- read_unlock(&bond->curr_slave_lock);
-
/* if we don't have a curr_active_slave, search for the next available
* backup slave from the current_arp_slave and make it the candidate
* for becoming the curr_active_slave
--
1.8.4
^ permalink raw reply related
* [PATCH net-next 2/2] bonding: lock RTNL when setting (in)active slave flags
From: Veaceslav Falico @ 2014-01-23 11:16 UTC (permalink / raw)
To: netdev; +Cc: Veaceslav Falico, Jay Vosburgh, Andy Gospodarek
In-Reply-To: <1390475764-7675-1-git-send-email-vfalico@redhat.com>
Currently, on (in)active slave flag change, we notify the stack via
rtmsg_ifinfo(), which implies that we should hold the RTNL lock.
However, in bond_ab_arp_probe(), in case we don't have curr_active_slave -
we don't hold it, which issues a warning and might race with other slave
flags modifications.
Fix this by wrapping the changing in RTNL lock - it's not a hot path (runs
every arp_interval) - so no speed issues should arrive.
CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---
drivers/net/bonding/bond_main.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 22d8b69..50cddb9 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2631,9 +2631,11 @@ static void bond_ab_arp_probe(struct bonding *bond)
return;
}
+ rtnl_lock();
+
bond_set_slave_inactive_flags(curr_arp_slave);
- bond_for_each_slave_rcu(bond, slave, iter) {
+ bond_for_each_slave(bond, slave, iter) {
if (!found && !before && IS_UP(slave->dev))
before = slave;
@@ -2660,6 +2662,8 @@ static void bond_ab_arp_probe(struct bonding *bond)
found = true;
}
+ rtnl_unlock();
+
if (!new_slave && before)
new_slave = before;
--
1.8.4
^ permalink raw reply related
* RE: [PATCH net] bnx2x: Fix VF flr flow
From: Yuval Mintz @ 2014-01-23 11:26 UTC (permalink / raw)
To: David Miller; +Cc: netdev@vger.kernel.org, Ariel Elior
In-Reply-To: <20140121.231326.564835234426557051.davem@davemloft.net>
> > When a VF originating from a given PF is flr-ed, that PF gets an interrupt
> > from the chip management and takes a part in the flr process.
> >
> > This patch fixes several corner cases in which the driver performs its part
> > of the flr flow out-of-order, causing the FW to assert due to badly timed
> > messages received from the driver.
> >
> > Signed-off-by: Yuval Mintz <yuvalmin@broadcom.com>
> > Signed-off-by: Ariel Elior <ariele@broadcom.com>
>
> Applied.
Hi Dave,
Pulled `net' but I still can't see the fix there.
Thanks,
Yuval
^ permalink raw reply
* Re: [PATCH net-next 0/2] bonding: fix locking in bond_ab_arp_probe
From: Veaceslav Falico @ 2014-01-23 11:25 UTC (permalink / raw)
To: netdev; +Cc: Jay Vosburgh, Andy Gospodarek
In-Reply-To: <1390475764-7675-1-git-send-email-vfalico@redhat.com>
On Thu, Jan 23, 2014 at 12:16:02PM +0100, Veaceslav Falico wrote:
>Hi,
>
>After the latest patches, on every call of bond_ab_arp_probe() without an
>active slave I see the following warning:
Self-NAK, there are still warnings comming out from bond_ab_arp_probe and
from other parts. Calling rtnl-needed functions on that low-level functions
wasn't the best idea...
Will send v2 to fix this warning.
>
>[ 7.912314] RTNL: assertion failed at net/core/dev.c (4494)
>...
>[ 7.922495] [<ffffffff817acc6f>] dump_stack+0x51/0x72
>[ 7.923714] [<ffffffff8168795e>] netdev_master_upper_dev_get+0x6e/0x70
>[ 7.924940] [<ffffffff816a2a66>] rtnl_link_fill+0x116/0x260
>[ 7.926143] [<ffffffff817acc6f>] ? dump_stack+0x51/0x72
>[ 7.927333] [<ffffffff816a350c>] rtnl_fill_ifinfo+0x95c/0xb90
>[ 7.928529] [<ffffffff8167af2b>] ? __kmalloc_reserve+0x3b/0xa0
>[ 7.929681] [<ffffffff8167bfcf>] ? __alloc_skb+0x9f/0x1e0
>[ 7.930827] [<ffffffff816a3b64>] rtmsg_ifinfo+0x84/0x100
>[ 7.931960] [<ffffffffa00bca07>] bond_ab_arp_probe+0x1a7/0x370 [bonding]
>[ 7.933133] [<ffffffffa00bcd78>] bond_activebackup_arp_mon+0x1a8/0x2f0 [bonding]
>...
>
>It happens because in bond_ab_arp_probe() we change the flags of a slave
>without holding the RTNL lock.
>
>To fix this - remove the useless curr_active_lock, RCUify it completely and
>lock RTNL while changing the slave's flags.
>
>CC: Jay Vosburgh <fubar@us.ibm.com>
>CC: Andy Gospodarek <andy@greyhouse.net>
>CC: netdev@vger.kernel.org
>Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
>
>---
> drivers/net/bonding/bond_main.c | 22 +++++++++++-----------
> 1 file changed, 11 insertions(+), 11 deletions(-)
>
^ permalink raw reply
* Re: [PATCH stable 3.7+] fib_frontend: fix possible NULL pointer dereference
From: Eric Dumazet @ 2014-01-23 12:26 UTC (permalink / raw)
To: Oliver Hartkopp; +Cc: David Miller, Linux Netdev List
In-Reply-To: <52E0DEA6.3060803@hartkopp.net>
On Thu, 2014-01-23 at 10:19 +0100, Oliver Hartkopp wrote:
> The two commits 0115e8e30d (net: remove delay at device dismantle) and
> 748e2d9396a (net: reinstate rtnl in call_netdevice_notifiers()) silently
> removed a NULL pointer check for in_dev since Linux 3.7.
>
> This patch re-introduces this check as it causes crashing the kernel when
> setting small mtu values on non-ip capable netdevices.
>
> Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
>
> ---
Thanks for the fix !
Acked-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply
* Re: [PATCH v2] bridge: Fix crash with vlan filtering and tcpdump
From: Toshiaki Makita @ 2014-01-23 12:38 UTC (permalink / raw)
To: Vlad Yasevich; +Cc: netdev
In-Reply-To: <1390415070-1884-1-git-send-email-vyasevic@redhat.com>
On Wed, 2014-01-22 at 13:24 -0500, Vlad Yasevich wrote:
> When the vlan filtering is enabled on the bridge, but
> the filter is not configured on the bridge device itself,
> running tcpdump on the bridge device will result in a
> an Oops with NULL pointer dereference. The reason
> is that br_pass_frame_up() will bypass the vlan
> check because promisc flag is set. It will then try
> to get the table pointer and process the packet based
> on the table. Since the table pointer is NULL, we oops.
> Catch this special condition in br_handle_vlan().
>
> Reported-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
> CC: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
> ---
> Changes since v1:
> - Do not use a BUG or BUG_ON as it is possible to trigger the
> false BUG condition when thrashing the vlan_enable toggle.
> Instead just drop the skb.
Maybe that frame went through should_deliver() while vlan_filtering was
disabled?
I hope it doesn't affect other codes in an adverse way...
>
> net/bridge/br_input.c | 11 ++++++-----
> net/bridge/br_vlan.c | 14 ++++++++++++++
> 2 files changed, 20 insertions(+), 5 deletions(-)
>
...
> diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
> index af5ebd1..abc841c 100644
> --- a/net/bridge/br_vlan.c
> +++ b/net/bridge/br_vlan.c
> @@ -144,6 +144,20 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
> if (!br->vlan_enabled)
> goto out;
>
> + /* Vlan filter table must be configured at this point. The
> + * only exception is the bridge is set in promisc mode and the
> + * packet is destined for the bridge device. In this case
> + * pass the packet as is.
> + */
> + if (!pv) {
> + if ((br->dev->flags & IFF_PROMISC) && skb->dev == br->dev)
> + goto out;
> + else {
> + kfree_skb_list(skb);
Why is this not kfree_skb() but kfree_skb_list()?
I haven't seen kfree_skb_list() in the bridge code.
Thanks,
Toshiaki Makita
> + return NULL;
> + }
> + }
> +
> /* At this point, we know that the frame was filtered and contains
> * a valid vlan id. If the vlan id is set in the untagged bitmap,
> * send untagged; otherwise, send taged.
^ permalink raw reply
* Re: [Xen-devel] [PATCH v3] xen/grant-table: Avoid m2p_override during mapping
From: Zoltan Kiss @ 2014-01-23 13:05 UTC (permalink / raw)
To: Konrad Rzeszutek Wilk, ian.campbell, wei.liu2, xen-devel, netdev,
linux-kernel, jonathan.davies
In-Reply-To: <a6736948-c67e-4509-89a8-42ec9693830f@email.android.com>
On 20/01/14 21:22, Konrad Rzeszutek Wilk wrote:
> Zoltan Kiss <zoltan.kiss@citrix.com> wrote:
>> The grant mapping API does m2p_override unnecessarily: only gntdev
>> needs it,
>> for blkback and future netback patches it just cause a lock contention,
>> as
>> those pages never go to userspace. Therefore this series does the
>> following:
>> - the original functions were renamed to __gnttab_[un]map_refs, with a
>> new
>> parameter m2p_override
>> - based on m2p_override either they follow the original behaviour, or
>> just set
>> the private flag and call set_phys_to_machine
>> - gnttab_[un]map_refs are now a wrapper to call __gnttab_[un]map_refs
>> with
>> m2p_override false
>> - a new function gnttab_[un]map_refs_userspace provides the old
>> behaviour
>
> You don't say anything about the 'return ret' changed to 'return 0'.
>
> Any particular reason for that?
That's the only possible return value there, so it just makes it more
obvious. I'll add a description about that.
Zoli
^ permalink raw reply
* [PATCH v5] xen/grant-table: Avoid m2p_override during mapping
From: Zoltan Kiss @ 2014-01-23 13:07 UTC (permalink / raw)
To: ian.campbell, wei.liu2, xen-devel, netdev, linux-kernel,
jonathan.davies
Cc: Zoltan Kiss
The grant mapping API does m2p_override unnecessarily: only gntdev needs it,
for blkback and future netback patches it just cause a lock contention, as
those pages never go to userspace. Therefore this series does the following:
- the original functions were renamed to __gnttab_[un]map_refs, with a new
parameter m2p_override
- based on m2p_override either they follow the original behaviour, or just set
the private flag and call set_phys_to_machine
- gnttab_[un]map_refs are now a wrapper to call __gnttab_[un]map_refs with
m2p_override false
- a new function gnttab_[un]map_refs_userspace provides the old behaviour
It also removes a stray space from page.h and change ret to 0 if
XENFEAT_auto_translated_physmap, as that is the only possible return value
there.
v2:
- move the storing of the old mfn in page->index to gnttab_map_refs
- move the function header update to a separate patch
v3:
- a new approach to retain old behaviour where it needed
- squash the patches into one
v4:
- move out the common bits from m2p* functions, and pass pfn/mfn as parameter
- clear page->private before doing anything with the page, so m2p_find_override
won't race with this
v5:
- change return value handling in __gnttab_[un]map_refs
- remove a stray space in page.h
- add detail why ret = 0 now at some places
Signed-off-by: Zoltan Kiss <zoltan.kiss@citrix.com>
Suggested-by: David Vrabel <david.vrabel@citrix.com>
---
arch/x86/include/asm/xen/page.h | 12 +++--
arch/x86/xen/p2m.c | 25 ++--------
drivers/block/xen-blkback/blkback.c | 15 +++---
drivers/xen/gntdev.c | 13 +++--
drivers/xen/grant-table.c | 90 ++++++++++++++++++++++++++++++-----
include/xen/grant_table.h | 8 +++-
6 files changed, 109 insertions(+), 54 deletions(-)
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index b913915..68a1438 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -49,10 +49,14 @@ extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern unsigned long set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e);
-extern int m2p_add_override(unsigned long mfn, struct page *page,
- struct gnttab_map_grant_ref *kmap_op);
+extern int m2p_add_override(unsigned long mfn,
+ struct page *page,
+ struct gnttab_map_grant_ref *kmap_op,
+ unsigned long pfn);
extern int m2p_remove_override(struct page *page,
- struct gnttab_map_grant_ref *kmap_op);
+ struct gnttab_map_grant_ref *kmap_op,
+ unsigned long pfn,
+ unsigned long mfn);
extern struct page *m2p_find_override(unsigned long mfn);
extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
@@ -121,7 +125,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
pfn = m2p_find_override_pfn(mfn, ~0);
}
- /*
+ /*
* pfn is ~0 if there are no entries in the m2p for mfn or if the
* entry doesn't map back to the mfn and m2p_override doesn't have a
* valid entry for it.
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 2ae8699..0060178 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -872,15 +872,13 @@ static unsigned long mfn_hash(unsigned long mfn)
/* Add an MFN override for a particular page */
int m2p_add_override(unsigned long mfn, struct page *page,
- struct gnttab_map_grant_ref *kmap_op)
+ struct gnttab_map_grant_ref *kmap_op, unsigned long pfn)
{
unsigned long flags;
- unsigned long pfn;
unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
- pfn = page_to_pfn(page);
if (!PageHighMem(page)) {
address = (unsigned long)__va(pfn << PAGE_SHIFT);
ptep = lookup_address(address, &level);
@@ -888,13 +886,6 @@ int m2p_add_override(unsigned long mfn, struct page *page,
"m2p_add_override: pfn %lx not mapped", pfn))
return -EINVAL;
}
- WARN_ON(PagePrivate(page));
- SetPagePrivate(page);
- set_page_private(page, mfn);
- page->index = pfn_to_mfn(pfn);
-
- if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
- return -ENOMEM;
if (kmap_op != NULL) {
if (!PageHighMem(page)) {
@@ -933,20 +924,15 @@ int m2p_add_override(unsigned long mfn, struct page *page,
}
EXPORT_SYMBOL_GPL(m2p_add_override);
int m2p_remove_override(struct page *page,
- struct gnttab_map_grant_ref *kmap_op)
+ struct gnttab_map_grant_ref *kmap_op,
+ unsigned long pfn,
+ unsigned long mfn)
{
unsigned long flags;
- unsigned long mfn;
- unsigned long pfn;
unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
- pfn = page_to_pfn(page);
- mfn = get_phys_to_machine(pfn);
- if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT))
- return -EINVAL;
-
if (!PageHighMem(page)) {
address = (unsigned long)__va(pfn << PAGE_SHIFT);
ptep = lookup_address(address, &level);
@@ -959,10 +945,7 @@ int m2p_remove_override(struct page *page,
spin_lock_irqsave(&m2p_override_lock, flags);
list_del(&page->lru);
spin_unlock_irqrestore(&m2p_override_lock, flags);
- WARN_ON(!PagePrivate(page));
- ClearPagePrivate(page);
- set_phys_to_machine(pfn, page->index);
if (kmap_op != NULL) {
if (!PageHighMem(page)) {
struct multicall_space mcs;
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 6620b73..875025f 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -285,8 +285,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
!rb_next(&persistent_gnt->node)) {
- ret = gnttab_unmap_refs(unmap, NULL, pages,
- segs_to_unmap);
+ ret = gnttab_unmap_refs(unmap, pages, segs_to_unmap);
BUG_ON(ret);
put_free_pages(blkif, pages, segs_to_unmap);
segs_to_unmap = 0;
@@ -321,8 +320,7 @@ static void unmap_purged_grants(struct work_struct *work)
pages[segs_to_unmap] = persistent_gnt->page;
if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
- ret = gnttab_unmap_refs(unmap, NULL, pages,
- segs_to_unmap);
+ ret = gnttab_unmap_refs(unmap, pages, segs_to_unmap);
BUG_ON(ret);
put_free_pages(blkif, pages, segs_to_unmap);
segs_to_unmap = 0;
@@ -330,7 +328,7 @@ static void unmap_purged_grants(struct work_struct *work)
kfree(persistent_gnt);
}
if (segs_to_unmap > 0) {
- ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap);
+ ret = gnttab_unmap_refs(unmap, pages, segs_to_unmap);
BUG_ON(ret);
put_free_pages(blkif, pages, segs_to_unmap);
}
@@ -670,15 +668,14 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif,
GNTMAP_host_map, pages[i]->handle);
pages[i]->handle = BLKBACK_INVALID_HANDLE;
if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
- ret = gnttab_unmap_refs(unmap, NULL, unmap_pages,
- invcount);
+ ret = gnttab_unmap_refs(unmap, unmap_pages, invcount);
BUG_ON(ret);
put_free_pages(blkif, unmap_pages, invcount);
invcount = 0;
}
}
if (invcount) {
- ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
+ ret = gnttab_unmap_refs(unmap, unmap_pages, invcount);
BUG_ON(ret);
put_free_pages(blkif, unmap_pages, invcount);
}
@@ -740,7 +737,7 @@ again:
}
if (segs_to_map) {
- ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map);
+ ret = gnttab_map_refs(map, pages_to_gnt, segs_to_map);
BUG_ON(ret);
}
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index e41c79c..e652c0e 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -284,8 +284,10 @@ static int map_grant_pages(struct grant_map *map)
}
pr_debug("map %d+%d\n", map->index, map->count);
- err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL,
- map->pages, map->count);
+ err = gnttab_map_refs_userspace(map->map_ops,
+ use_ptemod ? map->kmap_ops : NULL,
+ map->pages,
+ map->count);
if (err)
return err;
@@ -315,9 +317,10 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
}
}
- err = gnttab_unmap_refs(map->unmap_ops + offset,
- use_ptemod ? map->kmap_ops + offset : NULL, map->pages + offset,
- pages);
+ err = gnttab_unmap_refs_userspace(map->unmap_ops + offset,
+ use_ptemod ? map->kmap_ops + offset : NULL,
+ map->pages + offset,
+ pages);
if (err)
return err;
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index aa846a4..2add483 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -880,15 +880,17 @@ void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count)
}
EXPORT_SYMBOL_GPL(gnttab_batch_copy);
-int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
+int __gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
struct gnttab_map_grant_ref *kmap_ops,
- struct page **pages, unsigned int count)
+ struct page **pages, unsigned int count,
+ bool m2p_override)
{
int i, ret;
bool lazy = false;
pte_t *pte;
- unsigned long mfn;
+ unsigned long mfn, pfn;
+ BUG_ON(kmap_ops && !m2p_override);
ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map_ops, count);
if (ret)
return ret;
@@ -907,10 +909,12 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
set_phys_to_machine(map_ops[i].host_addr >> PAGE_SHIFT,
map_ops[i].dev_bus_addr >> PAGE_SHIFT);
}
- return ret;
+ return 0;
}
- if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
+ if (m2p_override &&
+ !in_interrupt() &&
+ paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
arch_enter_lazy_mmu_mode();
lazy = true;
}
@@ -927,8 +931,20 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
} else {
mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
}
- ret = m2p_add_override(mfn, pages[i], kmap_ops ?
- &kmap_ops[i] : NULL);
+ pfn = page_to_pfn(pages[i]);
+
+ WARN_ON(PagePrivate(pages[i]));
+ SetPagePrivate(pages[i]);
+ set_page_private(pages[i], mfn);
+
+ pages[i]->index = pfn_to_mfn(pfn);
+ if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (m2p_override)
+ ret = m2p_add_override(mfn, pages[i], kmap_ops ?
+ &kmap_ops[i] : NULL, pfn);
if (ret)
goto out;
}
@@ -939,15 +955,32 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
return ret;
}
+
+int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
+ struct page **pages, unsigned int count)
+{
+ return __gnttab_map_refs(map_ops, NULL, pages, count, false);
+}
EXPORT_SYMBOL_GPL(gnttab_map_refs);
-int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
+int gnttab_map_refs_userspace(struct gnttab_map_grant_ref *map_ops,
+ struct gnttab_map_grant_ref *kmap_ops,
+ struct page **pages, unsigned int count)
+{
+ return __gnttab_map_refs(map_ops, kmap_ops, pages, count, true);
+}
+EXPORT_SYMBOL_GPL(gnttab_map_refs_userspace);
+
+int __gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
struct gnttab_map_grant_ref *kmap_ops,
- struct page **pages, unsigned int count)
+ struct page **pages, unsigned int count,
+ bool m2p_override)
{
int i, ret;
bool lazy = false;
+ unsigned long pfn, mfn;
+ BUG_ON(kmap_ops && !m2p_override);
ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count);
if (ret)
return ret;
@@ -958,17 +991,34 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
set_phys_to_machine(unmap_ops[i].host_addr >> PAGE_SHIFT,
INVALID_P2M_ENTRY);
}
- return ret;
+ return 0;
}
- if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
+ if (m2p_override &&
+ !in_interrupt() &&
+ paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
arch_enter_lazy_mmu_mode();
lazy = true;
}
for (i = 0; i < count; i++) {
- ret = m2p_remove_override(pages[i], kmap_ops ?
- &kmap_ops[i] : NULL);
+ pfn = page_to_pfn(pages[i]);
+ mfn = get_phys_to_machine(pfn);
+ if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ set_page_private(pages[i], INVALID_P2M_ENTRY);
+ WARN_ON(!PagePrivate(pages[i]));
+ ClearPagePrivate(pages[i]);
+ set_phys_to_machine(pfn, pages[i]->index);
+ if (m2p_override)
+ ret = m2p_remove_override(pages[i],
+ kmap_ops ?
+ &kmap_ops[i] : NULL,
+ pfn,
+ mfn);
if (ret)
goto out;
}
@@ -979,8 +1029,22 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
return ret;
}
+
+int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *map_ops,
+ struct page **pages, unsigned int count)
+{
+ return __gnttab_unmap_refs(map_ops, NULL, pages, count, false);
+}
EXPORT_SYMBOL_GPL(gnttab_unmap_refs);
+int gnttab_unmap_refs_userspace(struct gnttab_unmap_grant_ref *map_ops,
+ struct gnttab_map_grant_ref *kmap_ops,
+ struct page **pages, unsigned int count)
+{
+ return __gnttab_unmap_refs(map_ops, kmap_ops, pages, count, true);
+}
+EXPORT_SYMBOL_GPL(gnttab_unmap_refs_userspace);
+
static unsigned nr_status_frames(unsigned nr_grant_frames)
{
BUG_ON(grefs_per_grant_frame == 0);
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index 694dcaf..9a919b1 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -184,11 +184,15 @@ unsigned int gnttab_max_grant_frames(void);
#define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
- struct gnttab_map_grant_ref *kmap_ops,
struct page **pages, unsigned int count);
+int gnttab_map_refs_userspace(struct gnttab_map_grant_ref *map_ops,
+ struct gnttab_map_grant_ref *kmap_ops,
+ struct page **pages, unsigned int count);
int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
- struct gnttab_map_grant_ref *kunmap_ops,
struct page **pages, unsigned int count);
+int gnttab_unmap_refs_userspace(struct gnttab_unmap_grant_ref *unmap_ops,
+ struct gnttab_map_grant_ref *kunmap_ops,
+ struct page **pages, unsigned int count);
/* Perform a batch of grant map/copy operations. Retry every batch slot
* for which the hypervisor returns GNTST_eagain. This is typically due
^ permalink raw reply related
* [PATCH v2 net-next 0/2] bonding: fix locking in bond_ab_arp_probe
From: Veaceslav Falico @ 2014-01-23 13:08 UTC (permalink / raw)
To: netdev; +Cc: Jay Vosburgh, Andy Gospodarek, Veaceslav Falico
Hi,
After the latest patches, on every call of bond_ab_arp_probe() without an
active slave I see the following warning:
[ 7.912314] RTNL: assertion failed at net/core/dev.c (4494)
...
[ 7.922495] [<ffffffff817acc6f>] dump_stack+0x51/0x72
[ 7.923714] [<ffffffff8168795e>] netdev_master_upper_dev_get+0x6e/0x70
[ 7.924940] [<ffffffff816a2a66>] rtnl_link_fill+0x116/0x260
[ 7.926143] [<ffffffff817acc6f>] ? dump_stack+0x51/0x72
[ 7.927333] [<ffffffff816a350c>] rtnl_fill_ifinfo+0x95c/0xb90
[ 7.928529] [<ffffffff8167af2b>] ? __kmalloc_reserve+0x3b/0xa0
[ 7.929681] [<ffffffff8167bfcf>] ? __alloc_skb+0x9f/0x1e0
[ 7.930827] [<ffffffff816a3b64>] rtmsg_ifinfo+0x84/0x100
[ 7.931960] [<ffffffffa00bca07>] bond_ab_arp_probe+0x1a7/0x370 [bonding]
[ 7.933133] [<ffffffffa00bcd78>] bond_activebackup_arp_mon+0x1a8/0x2f0 [bonding]
...
It happens because in bond_ab_arp_probe() we change the flags of a slave
without holding the RTNL lock.
To fix this - remove the useless curr_active_lock, RCUify it and lock RTNL
while changing the slave's flags. Also, remove bond_ab_arp_probe() from
under any locks in bond_ab_arp_mon().
CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
CC: netdev@vger.kernel.org
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---
drivers/net/bonding/bond_main.c | 48 +++++++++++++++++++++++------------------
1 file changed, 27 insertions(+), 21 deletions(-)
^ permalink raw reply
* [PATCH v2 net-next 1/2] bonding: RCUify bond_ab_arp_probe
From: Veaceslav Falico @ 2014-01-23 13:08 UTC (permalink / raw)
To: netdev; +Cc: Veaceslav Falico, Jay Vosburgh, Andy Gospodarek
In-Reply-To: <1390482511-14884-1-git-send-email-vfalico@redhat.com>
Currently bond_ab_arp_probe() is always called under rcu_read_lock(),
however to work with curr_active_slave we're still holding the
curr_slave_lock.
To remove that curr_slave_lock - rcu_dereference the bond's
curr_active_slave and use it further - so that we're sure the slave won't
go away, and we don't care if it will change in the meanwhile.
CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---
drivers/net/bonding/bond_main.c | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index f9e0c8b..22d8b69 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2605,25 +2605,21 @@ do_failover:
static void bond_ab_arp_probe(struct bonding *bond)
{
struct slave *slave, *before = NULL, *new_slave = NULL,
- *curr_arp_slave = rcu_dereference(bond->current_arp_slave);
+ *curr_arp_slave = rcu_dereference(bond->current_arp_slave),
+ *curr_active_slave = rcu_dereference(bond->curr_active_slave);
struct list_head *iter;
bool found = false;
- read_lock(&bond->curr_slave_lock);
-
- if (curr_arp_slave && bond->curr_active_slave)
+ if (curr_arp_slave && curr_active_slave)
pr_info("PROBE: c_arp %s && cas %s BAD\n",
curr_arp_slave->dev->name,
- bond->curr_active_slave->dev->name);
+ curr_active_slave->dev->name);
- if (bond->curr_active_slave) {
- bond_arp_send_all(bond, bond->curr_active_slave);
- read_unlock(&bond->curr_slave_lock);
+ if (curr_active_slave) {
+ bond_arp_send_all(bond, curr_active_slave);
return;
}
- read_unlock(&bond->curr_slave_lock);
-
/* if we don't have a curr_active_slave, search for the next available
* backup slave from the current_arp_slave and make it the candidate
* for becoming the curr_active_slave
--
1.8.4
^ permalink raw reply related
* [PATCH v2 net-next 2/2] bonding: restructure locking of bond_ab_arp_probe()
From: Veaceslav Falico @ 2014-01-23 13:08 UTC (permalink / raw)
To: netdev; +Cc: Veaceslav Falico, Jay Vosburgh, Andy Gospodarek
In-Reply-To: <1390482511-14884-1-git-send-email-vfalico@redhat.com>
Currently we're calling it from under RCU context, however we're using some
functions that require rtnl to be held.
Fix this by restructuring the locking - don't call it under any locks,
aquire rcu_read_lock() if we're sending _only_ (i.e. we have the active
slave present), and use rtnl locking otherwise - if we need to modify
(in)active flags of a slave.
CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---
drivers/net/bonding/bond_main.c | 38 ++++++++++++++++++++++++--------------
1 file changed, 24 insertions(+), 14 deletions(-)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 22d8b69..f879e9e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2605,11 +2605,14 @@ do_failover:
static void bond_ab_arp_probe(struct bonding *bond)
{
struct slave *slave, *before = NULL, *new_slave = NULL,
- *curr_arp_slave = rcu_dereference(bond->current_arp_slave),
- *curr_active_slave = rcu_dereference(bond->curr_active_slave);
+ *curr_arp_slave, *curr_active_slave;
struct list_head *iter;
bool found = false;
+ rcu_read_lock();
+ curr_arp_slave = rcu_dereference(bond->current_arp_slave);
+ curr_active_slave = rcu_dereference(bond->curr_active_slave);
+
if (curr_arp_slave && curr_active_slave)
pr_info("PROBE: c_arp %s && cas %s BAD\n",
curr_arp_slave->dev->name,
@@ -2617,23 +2620,31 @@ static void bond_ab_arp_probe(struct bonding *bond)
if (curr_active_slave) {
bond_arp_send_all(bond, curr_active_slave);
+ rcu_read_unlock();
return;
}
+ rcu_read_unlock();
/* if we don't have a curr_active_slave, search for the next available
* backup slave from the current_arp_slave and make it the candidate
* for becoming the curr_active_slave
*/
+ rtnl_lock();
+ /* curr_arp_slave might have gone away */
+ curr_arp_slave = rcu_dereference(bond->current_arp_slave);
+
if (!curr_arp_slave) {
- curr_arp_slave = bond_first_slave_rcu(bond);
- if (!curr_arp_slave)
+ curr_arp_slave = bond_first_slave(bond);
+ if (!curr_arp_slave) {
+ rtnl_unlock();
return;
+ }
}
bond_set_slave_inactive_flags(curr_arp_slave);
- bond_for_each_slave_rcu(bond, slave, iter) {
+ bond_for_each_slave(bond, slave, iter) {
if (!found && !before && IS_UP(slave->dev))
before = slave;
@@ -2663,21 +2674,24 @@ static void bond_ab_arp_probe(struct bonding *bond)
if (!new_slave && before)
new_slave = before;
- if (!new_slave)
+ if (!new_slave) {
+ rtnl_unlock();
return;
+ }
new_slave->link = BOND_LINK_BACK;
bond_set_slave_active_flags(new_slave);
bond_arp_send_all(bond, new_slave);
new_slave->jiffies = jiffies;
rcu_assign_pointer(bond->current_arp_slave, new_slave);
+ rtnl_unlock();
}
static void bond_activebackup_arp_mon(struct work_struct *work)
{
struct bonding *bond = container_of(work, struct bonding,
arp_work.work);
- bool should_notify_peers = false;
+ bool should_notify_peers = false, should_commit = false;
int delta_in_ticks;
delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
@@ -2686,12 +2700,11 @@ static void bond_activebackup_arp_mon(struct work_struct *work)
goto re_arm;
rcu_read_lock();
-
should_notify_peers = bond_should_notify_peers(bond);
+ should_commit = bond_ab_arp_inspect(bond);
+ rcu_read_unlock();
- if (bond_ab_arp_inspect(bond)) {
- rcu_read_unlock();
-
+ if (should_commit) {
/* Race avoidance with bond_close flush of workqueue */
if (!rtnl_trylock()) {
delta_in_ticks = 1;
@@ -2700,13 +2713,10 @@ static void bond_activebackup_arp_mon(struct work_struct *work)
}
bond_ab_arp_commit(bond);
-
rtnl_unlock();
- rcu_read_lock();
}
bond_ab_arp_probe(bond);
- rcu_read_unlock();
re_arm:
if (bond->params.arp_interval)
--
1.8.4
^ permalink raw reply related
* Re: [PATCH net-next v5 0/9] xen-netback: TX grant mapping with SKBTX_DEV_ZEROCOPY instead of copy
From: Zoltan Kiss @ 2014-01-23 13:13 UTC (permalink / raw)
To: David Miller
Cc: ian.campbell, wei.liu2, xen-devel, netdev, linux-kernel,
jonathan.davies, xen-devel@lists.xenproject.org
In-Reply-To: <20140122.175031.873909526743971037.davem@davemloft.net>
On 23/01/14 01:50, David Miller wrote:
> From: Zoltan Kiss <zoltan.kiss@citrix.com>
> Date: Mon, 20 Jan 2014 21:24:20 +0000
>
>> A long known problem of the upstream netback implementation that on the TX
>> path (from guest to Dom0) it copies the whole packet from guest memory into
>> Dom0. That simply became a bottleneck with 10Gb NICs, and generally it's a
>> huge perfomance penalty. The classic kernel version of netback used grant
>> mapping, and to get notified when the page can be unmapped, it used page
>> destructors. Unfortunately that destructor is not an upstreamable solution.
>> Ian Campbell's skb fragment destructor patch series [1] tried to solve this
>> problem, however it seems to be very invasive on the network stack's code,
>> and therefore haven't progressed very well.
>> This patch series use SKBTX_DEV_ZEROCOPY flags to tell the stack it needs to
>> know when the skb is freed up.
>
> This series does not apply to net-next due to some other recent changes.
>
> Please respin, thanks.
It is already based on two predecessor patches, one which is already
accepted but not applied yet:
[PATCH net-next v2] xen-netback: Rework rx_work_todo
And the other one is hopefully will be accepted very soon:
[PATCH v5] xen/grant-table: Avoid m2p_override during mapping
Zoli
^ permalink raw reply
* [PATCH net-next] Add Shradha Shah as the sfc driver maintainer.
From: Shradha Shah @ 2014-01-23 13:19 UTC (permalink / raw)
To: David Miller; +Cc: netdev, linux-net-drivers
I will be taking over the work from Ben Hutchings.
Signed-off-by: Shradha Shah <sshah@solarflare.com>
---
MAINTAINERS | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/MAINTAINERS b/MAINTAINERS
index e11d495..fce0c9c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7631,7 +7631,7 @@ F: drivers/net/ethernet/emulex/benet/
SFC NETWORK DRIVER
M: Solarflare linux maintainers <linux-net-drivers@solarflare.com>
-M: Ben Hutchings <bhutchings@solarflare.com>
+M: Shradha Shah <sshah@solarflare.com>
L: netdev@vger.kernel.org
S: Supported
F: drivers/net/ethernet/sfc/
^ permalink raw reply related
* Re: [[Xen-devel] [PATCH net-next v3] xen-netfront: clean up code in xennet_release_rx_bufs
From: Annie @ 2014-01-23 13:38 UTC (permalink / raw)
To: David Vrabel
Cc: xen-devel@lists.xen.org, netdev@vger.kernel.org,
konrad.wilk@oracle.com, ian.campbell@citrix.com,
wei.liu2@citrix.com
In-Reply-To: <52E0F528.7060302@citrix.com>
On 01/23/2014 2:55 AM, David Vrabel <david.vrabel@citrix.com> wrote
> On 23/01/14 01:36, Annie Li wrote:
>> From: Annie Li <annie.li@oracle.com>
>>
>> This patch removes grant transfer code from netfront, and improves ending
>> grant acess mechanism since gnttab_end_foreign_access_ref may fail when
>> the grant entry is currently used for reading or writing.
>>
>> * release grant reference and skb for tx/rx path, use get_page/put_page to
>> ensure page is released when grant access is completed successfully.
>> * change corresponding code in xen-blkfront/xen-tpmfront/xen-pcifront because
>> of code change for put_page in gnttab_end_foreign_access.
>> * clean up grant transfer code kept from old netfront(2.6.18) which grants
>> pages for access/map and transfer. But grant transfer is deprecated in current
>> netfront, so remove corresponding release code for transfer.
>>
>> V3: Changes as suggestion from David Vrabel, ensure pages are not freed untill
>> grant acess is ended.
>>
>> V2: improve patch comments.
>> Signed-off-by: Annie Li <annie.li@oracle.com>
>> ---
>> drivers/block/xen-blkfront.c | 25 ++++++++---
>> drivers/char/tpm/xen-tpmfront.c | 7 +++-
>> drivers/net/xen-netfront.c | 93 ++++++++++++--------------------------
>> drivers/pci/xen-pcifront.c | 7 +++-
>> drivers/xen/grant-table.c | 4 +-
>> 5 files changed, 63 insertions(+), 73 deletions(-)
>
> I don't understand why you've made all these unnecessary changes to the
> other frontends and grant-table.c.
>
> The xen-netfront.c changes are fine on their own.
Changes in grant-table.c corresponds to get_page before gnttab_end_foreign_access, just keeping consistent in the function name. This change does not really change the mechanism, so I can revert it.
For the changes in other frontends, the reason is they also have similar issue - grant access probably is not ended successfully when the page is freed.
Thanks
Annie
>
> David
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* RE: [PATCH net-next 3/4] ethtool: Support for configurable RSS hash key.
From: Venkata Duvvuru @ 2014-01-23 13:47 UTC (permalink / raw)
To: Ben Hutchings; +Cc: netdev@vger.kernel.org
In-Reply-To: <1390455540.3651.136.camel@deadeye.wl.decadent.org.uk>
> -----Original Message-----
> From: Ben Hutchings [mailto:ben@decadent.org.uk]
> Sent: Thursday, January 23, 2014 11:09 AM
> To: Venkata Duvvuru
> Cc: netdev@vger.kernel.org
> Subject: Re: [PATCH net-next 3/4] ethtool: Support for configurable RSS hash
> key.
>
> On Wed, 2014-01-22 at 12:12 +0000, Venkata Duvvuru wrote:
> [...]
> > > No, what I mean is:
> > >
> > > 1. An RX flow steering filter can specify use of RSS, in which case
> > > the value looked up in the indirection is added to the queue number
> > > specified in the filter. This is not yet controllable through RX
> > > NFC though there is room for extension there.
> > >
> > > 2. Multi-function controllers need multiple RSS contexts (key +
> > > indirection
> > > table) to support independent use of RSS on each function.
> > > But it may also be possible to allocate multiple contexts to a single
> function.
> > > This could be useful in conjunction with 1. But there would need to
> > > be a way to allocate and configure extra contexts first.
> > The proposed changes will be incremental so I think this can be done
> > in a separate patch. Thoughts?
>
> The ethtool ABI (to userland) has to remain backward-compatible, and it is
> preferable if we don't add lots of different structures for this.
>
> So please define the new command structure to include both the key and
> indirection table, and some reserved space (documented as 'userland must
> set to 0') for future extensions.
I think it’s better to keep key and indirection table settings as different ethtool commands. We can probably add rss contexts (reserved space) to both the command structures.
If we mix key and indirection table into one command structure then it will hamper the compatibility.
Current structure:
struct ethtool_rxfh_indir {
__u32 cmd;
__u32 size;
__u32 ring_index[0];
};
For example,
1. You mentioned about having key as zero byte array, there is already a zero byte array “ring_index”. We can probably have a single zero byte array and allocate both indir + key memory. This may not work as the old ethtool would not allocate memory for key and the newer kernel will try to interpret key which could be relatively junk or invalid addresses.
2. Even if we assume that key will be static array. We have to have the key field before the zero byte array “ring_index”. This also will not be compatible as we are introducing a field in the middle.
>
> Ben.
>
> --
> Ben Hutchings
> compatible: Gracefully accepts erroneous data from any source
^ permalink raw reply
* Re: [Xen-devel] [PATCH v4] xen/grant-table: Avoid m2p_override during mapping
From: Stefano Stabellini @ 2014-01-23 13:59 UTC (permalink / raw)
To: Zoltan Kiss
Cc: Stefano Stabellini, ian.campbell, wei.liu2, xen-devel, netdev,
linux-kernel, jonathan.davies
In-Reply-To: <52E015F5.70408@citrix.com>
On Wed, 22 Jan 2014, Zoltan Kiss wrote:
> > > > > diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
> > > > > index 2ae8699..0060178 100644
> > > > > --- a/arch/x86/xen/p2m.c
> > > > > +++ b/arch/x86/xen/p2m.c
> > > > > @@ -872,15 +872,13 @@ static unsigned long mfn_hash(unsigned long mfn)
> > > > >
> > > > > /* Add an MFN override for a particular page */
> > > > > int m2p_add_override(unsigned long mfn, struct page *page,
> > > > > - struct gnttab_map_grant_ref *kmap_op)
> > > > > + struct gnttab_map_grant_ref *kmap_op, unsigned long
> > > > > pfn)
> > > >
> > > > Do we really need to add another additional parameter to
> > > > m2p_add_override?
> > > > I would just let m2p_add_override and m2p_remove_override call
> > > > page_to_pfn again. It is not that expensive.
> > > Yes, because that page_to_pfn can return something different. That's why
> > > the
> > > v2 patches failed.
> >
> > I am really curious: how can page_to_pfn return something different?
> > I don't think is supposed to happen.
> You call set_phys_to_machine before calling m2p* functions.
set_phys_to_machine changes the physical to machine mapping, that would
be the mfn corresponding to a given pfn. It shouldn't affect the output
of page_to_pfn that returns the pfn corresponding to a given struct
page. The calculation of which is based on address offsets and should be
static and unaffected by things like set_phys_to_machine.
^ permalink raw reply
* Re: [RFC PATCH net] IPv6: Fix broken IPv6 routing table after loopback down-up
From: Hannes Frederic Sowa @ 2014-01-23 14:11 UTC (permalink / raw)
To: Gao feng; +Cc: Sabrina Dubroca, netdev
In-Reply-To: <52E0B55F.4070501@cn.fujitsu.com>
On Thu, Jan 23, 2014 at 02:23:27PM +0800, Gao feng wrote:
> >
> > -static int fib6_ifdown(struct rt6_info *rt, void *arg)
> > +static int __fib6_match_or_update_if(struct rt6_info *rt, void *arg)
> > {
> > const struct arg_dev_net *adn = arg;
> > const struct net_device *dev = adn->dev;
> >
> > if ((rt->dst.dev == dev || !dev) &&
> > - rt != adn->net->ipv6.ip6_null_entry)
> > - return -1;
> > + rt != adn->net->ipv6.ip6_null_entry) {
> > + switch (adn->action) {
> > + case ARG_DEV_NET_REMOVE:
> > + /* remove rt */
> > + return -1;
> > + case ARG_DEV_NET_DISABLE:
> > + WARN_ON(rt->rt6i_flags & RTF_DEAD);
> > + rt->rt6i_flags |= RTF_DEAD;
> > + return 0;
> > + case ARG_DEV_NET_ENABLE:
> > + WARN_ON(!(rt->rt6i_flags & RTF_DEAD));
>
> I think this may happen, think a new router is cloned from this rt but hasn't been inserted
> into the router tree on other CPU at the same time.
Yes, there are still some problems with this patch. I actually shuffeled
the code around to purge all those RTF_CACHE RTF_DEAD routes yesterday
while holding the same write_lock on the table.
Regarding the problem you addressed, I tried to validate the route
using dst.from on insert while holding write_lock, somehow like we do
for checking expire time. But I really dislike this.
Thanks,
Hannes
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox