* [PATCH] ip6_gre: simplify gre header parsing in ip6gre_err
From: Haishuang Yan @ 2018-09-10 8:25 UTC (permalink / raw)
To: David S. Miller, Alexey Kuznetsov; +Cc: netdev, linux-kernel, Haishuang Yan
Same as ip_gre, use gre_parse_header to parse gre header in gre error
handler code.
Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
---
net/ipv6/ip6_gre.c | 29 +++++++----------------------
1 file changed, 7 insertions(+), 22 deletions(-)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 18a3794..505d891 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -427,35 +427,20 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
- const struct gre_base_hdr *greh;
const struct ipv6hdr *ipv6h;
- int grehlen = sizeof(*greh);
+ struct tnl_ptk_info tpi;
+ bool csum_err = false;
struct ip6_tnl *t;
- int key_off = 0;
- __be16 flags;
- __be32 key;
- if (!pskb_may_pull(skb, offset + grehlen))
- return;
- greh = (const struct gre_base_hdr *)(skb->data + offset);
- flags = greh->flags;
- if (flags & (GRE_VERSION | GRE_ROUTING))
- return;
- if (flags & GRE_CSUM)
- grehlen += 4;
- if (flags & GRE_KEY) {
- key_off = grehlen + offset;
- grehlen += 4;
+ if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6),
+ offset) < 0) {
+ if (!csum_err) /* ignore csum errors. */
+ return;
}
- if (!pskb_may_pull(skb, offset + grehlen))
- return;
ipv6h = (const struct ipv6hdr *)skb->data;
- greh = (const struct gre_base_hdr *)(skb->data + offset);
- key = key_off ? *(__be32 *)(skb->data + key_off) : 0;
-
t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr,
- key, greh->protocol);
+ tpi.key, tpi.proto);
if (!t)
return;
--
1.8.3.1
^ permalink raw reply related
* [PATCH net-next] virtio_net: ethtool tx napi configuration
From: Willem de Bruijn @ 2018-09-09 22:44 UTC (permalink / raw)
To: netdev; +Cc: davem, caleb.raitto, jasowang, mst, jonolson, Willem de Bruijn
From: Willem de Bruijn <willemb@google.com>
Implement ethtool .set_coalesce (-C) and .get_coalesce (-c) handlers.
Interrupt moderation is currently not supported, so these accept and
display the default settings of 0 usec and 1 frame.
Toggle tx napi through a bit in tx-frames. So as to not interfere
with possible future interrupt moderation, use bit 10, well outside
the reasonable range of real interrupt moderation values.
Changes are not atomic. The tx IRQ, napi BH and transmit path must
be quiesced when switching modes. Only allow changing this setting
when the device is down.
Link: https://patchwork.ozlabs.org/patch/948149/
Suggested-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
drivers/net/virtio_net.c | 52 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 52 insertions(+)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 765920905226..b320b6b14749 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -66,6 +66,8 @@ DECLARE_EWMA(pkt_len, 0, 64)
#define VIRTNET_DRIVER_VERSION "1.0.0"
+static const u32 ethtool_coalesce_napi_mask = (1UL << 10);
+
static const unsigned long guest_offloads[] = {
VIRTIO_NET_F_GUEST_TSO4,
VIRTIO_NET_F_GUEST_TSO6,
@@ -2181,6 +2183,54 @@ static int virtnet_get_link_ksettings(struct net_device *dev,
return 0;
}
+static int virtnet_set_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *ec)
+{
+ const struct ethtool_coalesce ec_default = {
+ .cmd = ETHTOOL_SCOALESCE,
+ .rx_max_coalesced_frames = 1,
+ .tx_max_coalesced_frames = 1,
+ };
+ struct virtnet_info *vi = netdev_priv(dev);
+ int i, napi_weight = 0;
+
+ if (ec->tx_max_coalesced_frames & ethtool_coalesce_napi_mask) {
+ ec->tx_max_coalesced_frames &= ~ethtool_coalesce_napi_mask;
+ napi_weight = NAPI_POLL_WEIGHT;
+ }
+
+ /* disallow changes to fields not explicitly tested above */
+ if (memcmp(ec, &ec_default, sizeof(ec_default)))
+ return -EINVAL;
+
+ if (napi_weight ^ vi->sq[0].napi.weight) {
+ if (dev->flags & IFF_UP)
+ return -EBUSY;
+ for (i = 0; i < vi->max_queue_pairs; i++)
+ vi->sq[i].napi.weight = napi_weight;
+ }
+
+ return 0;
+}
+
+static int virtnet_get_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *ec)
+{
+ const struct ethtool_coalesce ec_default = {
+ .cmd = ETHTOOL_GCOALESCE,
+ .rx_max_coalesced_frames = 1,
+ .tx_max_coalesced_frames = 1,
+ };
+ struct virtnet_info *vi = netdev_priv(dev);
+
+ memcpy(ec, &ec_default, sizeof(ec_default));
+
+ if (vi->sq[0].napi.weight)
+ ec->tx_max_coalesced_frames |= ethtool_coalesce_napi_mask;
+
+ return 0;
+}
+
static void virtnet_init_settings(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
@@ -2219,6 +2269,8 @@ static const struct ethtool_ops virtnet_ethtool_ops = {
.get_ts_info = ethtool_op_get_ts_info,
.get_link_ksettings = virtnet_get_link_ksettings,
.set_link_ksettings = virtnet_set_link_ksettings,
+ .set_coalesce = virtnet_set_coalesce,
+ .get_coalesce = virtnet_get_coalesce,
};
static void virtnet_freeze_down(struct virtio_device *vdev)
--
2.19.0.rc2.392.g5ba43deb5a-goog
^ permalink raw reply related
* Re: [PATCH net-next 13/15] net: Add and use skb_list_del_init().
From: David Miller @ 2018-09-10 0:35 UTC (permalink / raw)
To: sergei.shtylyov; +Cc: netdev
In-Reply-To: <5384880e-2f2c-5067-52db-a31727dd1b10@cogentembedded.com>
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Sun, 9 Sep 2018 12:45:50 +0300
> Hello!
>
> On 9/8/2018 11:11 PM, David Miller wrote:
>
>> It documents what is happening, and eliminates the spurious list
>> pointer poisoning.
>> In the long term, in order to get proper list head debugging, we
>> might want to use the list poinson value as the indicator that
>
> Poison?
I knew you would find something Sergei :-)
I'll fix that up, thanks.
^ permalink raw reply
* Re: [PATCH net-next] virtio_net: force_napi_tx module param.
From: Willem de Bruijn @ 2018-09-09 23:07 UTC (permalink / raw)
To: Jason Wang
Cc: Jon Olson (Google Drive), Michael S. Tsirkin, caleb.raitto,
David Miller, Network Development, Caleb Raitto
In-Reply-To: <CAF=yD-L+psndKfTFS0S33ZvO7NP4wFO=-Gr5aYw22_oqkx+smA@mail.gmail.com>
On Wed, Aug 29, 2018 at 9:01 AM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> On Wed, Aug 29, 2018 at 3:56 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> >
> >
> > On 2018年08月29日 03:57, Willem de Bruijn wrote:
> > > On Mon, Jul 30, 2018 at 2:06 AM Jason Wang <jasowang@redhat.com> wrote:
> > >>
> > >>
> > >> On 2018年07月25日 08:17, Jon Olson wrote:
> > >>> On Tue, Jul 24, 2018 at 3:46 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >>>> On Tue, Jul 24, 2018 at 06:31:54PM -0400, Willem de Bruijn wrote:
> > >>>>> On Tue, Jul 24, 2018 at 6:23 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >>>>>> On Tue, Jul 24, 2018 at 04:52:53PM -0400, Willem de Bruijn wrote:
> > >>>>>>> >From the above linked patch, I understand that there are yet
> > >>>>>>> other special cases in production, such as a hard cap on #tx queues to
> > >>>>>>> 32 regardless of number of vcpus.
> > >>>>>> I don't think upstream kernels have this limit - we can
> > >>>>>> now use vmalloc for higher number of queues.
> > >>>>> Yes. that patch* mentioned it as a google compute engine imposed
> > >>>>> limit. It is exactly such cloud provider imposed rules that I'm
> > >>>>> concerned about working around in upstream drivers.
> > >>>>>
> > >>>>> * for reference, I mean https://patchwork.ozlabs.org/patch/725249/
> > >>>> Yea. Why does GCE do it btw?
> > >>> There are a few reasons for the limit, some historical, some current.
> > >>>
> > >>> Historically we did this because of a kernel limit on the number of
> > >>> TAP queues (in Montreal I thought this limit was 32). To my chagrin,
> > >>> the limit upstream at the time we did it was actually eight. We had
> > >>> increased the limit from eight to 32 internally, and it appears in
> > >>> upstream it has subsequently increased upstream to 256. We no longer
> > >>> use TAP for networking, so that constraint no longer applies for us,
> > >>> but when looking at removing/raising the limit we discovered no
> > >>> workloads that clearly benefited from lifting it, and it also placed
> > >>> more pressure on our virtual networking stack particularly on the Tx
> > >>> side. We left it as-is.
> > >>>
> > >>> In terms of current reasons there are really two. One is memory usage.
> > >>> As you know, virtio-net uses rx/tx pairs, so there's an expectation
> > >>> that the guest will have an Rx queue for every Tx queue. We run our
> > >>> individual virtqueues fairly deep (4096 entries) to give guests a wide
> > >>> time window for re-posting Rx buffers and avoiding starvation on
> > >>> packet delivery. Filling an Rx vring with max-sized mergeable buffers
> > >>> (4096 bytes) is 16MB of GFP_ATOMIC allocations. At 32 queues this can
> > >>> be up to 512MB of memory posted for network buffers. Scaling this to
> > >>> the largest VM GCE offers today (160 VCPUs -- n1-ultramem-160) keeping
> > >>> all of the Rx rings full would (in the large average Rx packet size
> > >>> case) consume up to 2.5 GB(!) of guest RAM. Now, those VMs have 3.8T
> > >>> of RAM available, but I don't believe we've observed a situation where
> > >>> they would have benefited from having 2.5 gigs of buffers posted for
> > >>> incoming network traffic :)
> > >> We can work to have async txq and rxq instead of paris if there's a
> > >> strong requirement.
> > >>
> > >>> The second reason is interrupt related -- as I mentioned above, we
> > >>> have found no workloads that clearly benefit from so many queues, but
> > >>> we have found workloads that degrade. In particular workloads that do
> > >>> a lot of small packet processing but which aren't extremely latency
> > >>> sensitive can achieve higher PPS by taking fewer interrupt across
> > >>> fewer VCPUs due to better batching (this also incurs higher latency,
> > >>> but at the limit the "busy" cores end up suppressing most interrupts
> > >>> and spending most of their cycles farming out work). Memcache is a
> > >>> good example here, particularly if the latency targets for request
> > >>> completion are in the ~milliseconds range (rather than the
> > >>> microseconds we typically strive for with TCP_RR-style workloads).
> > >>>
> > >>> All of that said, we haven't been forthcoming with data (and
> > >>> unfortunately I don't have it handy in a useful form, otherwise I'd
> > >>> simply post it here), so I understand the hesitation to simply run
> > >>> with napi_tx across the board. As Willem said, this patch seemed like
> > >>> the least disruptive way to allow us to continue down the road of
> > >>> "universal" NAPI Tx and to hopefully get data across enough workloads
> > >>> (with VMs small, large, and absurdly large :) to present a compelling
> > >>> argument in one direction or another. As far as I know there aren't
> > >>> currently any NAPI related ethtool commands (based on a quick perusal
> > >>> of ethtool.h)
> > >> As I suggest before, maybe we can (ab)use tx-frames-irq.
> > > I forgot to respond to this originally, but I agree.
> > >
> > > How about something like the snippet below. It would be simpler to
> > > reason about if only allow switching while the device is down, but
> > > napi does not strictly require that.
> > >
> > > +static int virtnet_set_coalesce(struct net_device *dev,
> > > + struct ethtool_coalesce *ec)
> > > +{
> > > + const u32 tx_coalesce_napi_mask = (1 << 16);
> > > + const struct ethtool_coalesce ec_default = {
> > > + .cmd = ETHTOOL_SCOALESCE,
> > > + .rx_max_coalesced_frames = 1,
> > > + .tx_max_coalesced_frames = 1,
> > > + };
> > > + struct virtnet_info *vi = netdev_priv(dev);
> > > + int napi_weight = 0;
> > > + bool running;
> > > + int i;
> > > +
> > > + if (ec->tx_max_coalesced_frames & tx_coalesce_napi_mask) {
> > > + ec->tx_max_coalesced_frames &= ~tx_coalesce_napi_mask;
> > > + napi_weight = NAPI_POLL_WEIGHT;
> > > + }
> > > +
> > > + /* disallow changes to fields not explicitly tested above */
> > > + if (memcmp(ec, &ec_default, sizeof(ec_default)))
> > > + return -EINVAL;
> > > +
> > > + if (napi_weight ^ vi->sq[0].napi.weight) {
> > > + running = netif_running(vi->dev);
> > > +
> > > + for (i = 0; i < vi->max_queue_pairs; i++) {
> > > + vi->sq[i].napi.weight = napi_weight;
> > > +
> > > + if (!running)
> > > + continue;
> > > +
> > > + if (napi_weight)
> > > + virtnet_napi_tx_enable(vi, vi->sq[i].vq,
> > > + &vi->sq[i].napi);
> > > + else
> > > + napi_disable(&vi->sq[i].napi);
> > > + }
> > > + }
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static int virtnet_get_coalesce(struct net_device *dev,
> > > + struct ethtool_coalesce *ec)
> > > +{
> > > + const u32 tx_coalesce_napi_mask = (1 << 16);
> > > + const struct ethtool_coalesce ec_default = {
> > > + .cmd = ETHTOOL_GCOALESCE,
> > > + .rx_max_coalesced_frames = 1,
> > > + .tx_max_coalesced_frames = 1,
> > > + };
> > > + struct virtnet_info *vi = netdev_priv(dev);
> > > +
> > > + memcpy(ec, &ec_default, sizeof(ec_default));
> > > +
> > > + if (vi->sq[0].napi.weight)
> > > + ec->tx_max_coalesced_frames |= tx_coalesce_napi_mask;
> > > +
> > > + return 0;
> > > +}
> >
> > Looks good. Just one nit, maybe it's better simply check against zero?
>
> I wanted to avoid making napi and interrupt moderation mutually
> exclusive. If the virtio-net driver ever gets true moderation support,
> it should be able to work alongside napi.
>
> But I can make no-napi be 0 and napi be 1. That is future proof, in
> the sense that napi is enabled if there is any interrupt moderation.
It's not appearing on patchwork yet, but I just sent a patch.
I implemented the above, but .tx_frames of 0 is technically incorrect
and it would unnecessarily constrain interrupt moderation to one of two
modes. I went back to using a high bit. That said, if you feel strongly
I'll change it.
I also tried various ways of switching between napi and non napi
mode without bringing the device down. This is quite fragile. At the
very least napi.weight has to be updated without any interrupt or
napi callback happening in between. So most of the datapath needs
to be quiesced.
I did code up a variant that manually stops all the queues, masks the
interrupt and waits for napi to complete if scheduled. But in a stress
test it still managed to trigger a BUG in napi_enable on this state.
Napi is not switched at runtime in other devices, nor really needed. So
instead I made this change conditional on the device being down.
^ permalink raw reply
* Re: [PATCH] ath9k: add reset for airtime station debugfs
From: Dave Taht @ 2018-09-09 23:44 UTC (permalink / raw)
To: Toke Høiland-Jørgensen
Cc: git, Kalle Valo, QCA ath9k Development, David S. Miller,
linux-wireless, Linux Kernel Network Developers
In-Reply-To: <87sh2mykmt.fsf@toke.dk>
On Thu, Sep 6, 2018 at 4:13 AM Toke Høiland-Jørgensen <toke@toke.dk> wrote:
>
> Louie Lu <git@louie.lu> writes:
>
> > Toke Høiland-Jørgensen <toke@toke.dk> 於 2018年9月6日 週四 下午5:27寫道:
> >
> >> Louie Lu <git@louie.lu> writes:
> >>
> >> > Let user can reset station airtime status by debugfs, it will
> >> > reset all airtime deficit to ATH_AIRTIME_QUANTUM and reset rx/tx
> >> > airtime accumulate to 0.
> >>
> >> No objections to the patch, but I'm curious which issues you were
> >> debugging that led you to needing it? :)
> >>
> > I'm testing to get the packet queue time + airtime in
> > ath_tx_process_buffer,
>
> Right; I've been thinking that it would be useful to make the CoDel
> enqueue time available to drivers. And minstrel, for that matter
> (lowering the number of retries for packets that has queued for a long
> time, for instance). Good to hear that others are looking into something
> similar :)
Yea! Seeing retransmits scale down would be a goodness. Last I looked
ath9k was at, like 10?, when it should be, like, 2, at mcs0 and 10 at
mcs15.
I can't seem to publish a link to this directly, but it's open access
if you search via https://scholar.google.com/:
"Resolving Bufferbloat in TCP Communication over IEEE 802.11 n WLAN by
Reducing MAC Retransmission Limit at Low Data Rate"
even their simple bifurcated model worked well.
>
> > it would be useful if I can reset the station airtime accumulated
> > value, so I can observe in each test round (e.g. 5 ping) airtime
> > accumulated
> >
> > Also to reset the deficit to make sure it run like fresh one.
>
> Yup, makes sense.
>
> -Toke
--
Dave Täht
CEO, TekLibre, LLC
http://www.teklibre.com
Tel: 1-669-226-2619
^ permalink raw reply
* Re: [PATCH] netlink: fix hash::nelems check
From: Li RongQing @ 2018-09-10 1:36 UTC (permalink / raw)
To: Li RongQing; +Cc: netdev
In-Reply-To: <1536417194-20828-1-git-send-email-lirongqing@baidu.com>
after reconsider, I think we can remove this check directly, since
rht_grow_above_max() will be called to check the overflow again in
rhashtable_insert_one.
and atomic_read(&table->hash.nelems) always compares with unsigned
value, will force to switch unsigned, so the hash.nelems overflows can
be accepted.
-Rong
^ permalink raw reply
* [PATCH net-next] tcp: rate limit synflood warnings further
From: Willem de Bruijn @ 2018-09-09 23:12 UTC (permalink / raw)
To: netdev; +Cc: davem, eric.dumazet, Willem de Bruijn
From: Willem de Bruijn <willemb@google.com>
Convert pr_info to net_info_ratelimited to limit the total number of
synflood warnings.
Commit 946cedccbd73 ("tcp: Change possible SYN flooding messages")
rate limits synflood warnings to one per listener.
Workloads that open many listener sockets can still see a high rate of
log messages. Syzkaller is one frequent example.
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
net/ipv4/tcp_input.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 62508a2f9b21..d9034073138c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6380,8 +6380,8 @@ static bool tcp_syn_flood_action(const struct sock *sk,
if (!queue->synflood_warned &&
net->ipv4.sysctl_tcp_syncookies != 2 &&
xchg(&queue->synflood_warned, 1) == 0)
- pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
- proto, ntohs(tcp_hdr(skb)->dest), msg);
+ net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
+ proto, ntohs(tcp_hdr(skb)->dest), msg);
return want_cookie;
}
--
2.19.0.rc2.392.g5ba43deb5a-goog
^ permalink raw reply related
* Re: [PATCH] wireless: remove unnecessary condition check before kfree
From: Johannes Berg @ 2018-09-10 7:10 UTC (permalink / raw)
To: zhong jiang, davem; +Cc: linux-wireless, netdev, linux-kernel
In-Reply-To: <1536415955-33776-1-git-send-email-zhongjiang@huawei.com>
On Sat, 2018-09-08 at 22:12 +0800, zhong jiang wrote:
> kfree has taken the null pointer into account. Just remove the
> redundant condition check before kfree.
I'm all for doing that if it actually removes conditionals, but
> - if (!IS_ERR_OR_NULL(regdb))
> + if (!IS_ERR(regdb))
> kfree(regdb);
this seems rather pointless since there's still a condition. In that
case, I feel it's easier to understand the original code.
johannes
^ permalink raw reply
* [PATCH] r8169: Clear RTL_FLAG_TASK_*_PENDING when clearing RTL_FLAG_TASK_ENABLED
From: Kai-Heng Feng @ 2018-09-10 6:34 UTC (permalink / raw)
To: nic_swsd; +Cc: davem, netdev, linux-kernel, Kai-Heng Feng, Heiner Kallweit
After system suspend, sometimes the r8169 doesn't work when ethernet
cable gets pluggued.
This issue happens because rtl_reset_work() doesn't get called from
rtl8169_runtime_resume(), after system suspend.
In rtl_task(), RTL_FLAG_TASK_* only gets cleared if this condition is
met:
if (!netif_running(dev) ||
!test_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags))
...
If RTL_FLAG_TASK_ENABLED was cleared during system suspend while
RTL_FLAG_TASK_RESET_PENDING was set, the next rtl_schedule_task() won't
schedule task as the flag is still there.
So in addition to clearing RTL_FLAG_TASK_ENABLED, also clears other
flags.
Cc: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
---
drivers/net/ethernet/realtek/r8169.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index b08d51bf7a20..20593245ef53 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -6648,6 +6648,7 @@ static int rtl8169_close(struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
struct pci_dev *pdev = tp->pci_dev;
+ int i;
pm_runtime_get_sync(&pdev->dev);
@@ -6655,7 +6656,9 @@ static int rtl8169_close(struct net_device *dev)
rtl8169_update_counters(tp);
rtl_lock_work(tp);
- clear_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags);
+ /* Clear all task flags */
+ for (i = 0; i < RTL_FLAG_MAX; i++)
+ clear_bit(i, tp->wk.flags);
rtl8169_down(dev);
rtl_unlock_work(tp);
@@ -6828,6 +6831,7 @@ rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
static void rtl8169_net_suspend(struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
+ int i;
if (!netif_running(dev))
return;
@@ -6838,7 +6842,10 @@ static void rtl8169_net_suspend(struct net_device *dev)
rtl_lock_work(tp);
napi_disable(&tp->napi);
- clear_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags);
+ /* Clear all task flags */
+ for (i = 0; i < RTL_FLAG_MAX; i++)
+ clear_bit(i, tp->wk.flags);
+
rtl_unlock_work(tp);
rtl_pll_power_down(tp);
--
2.17.1
^ permalink raw reply related
* Re: [PATCH 0/5] introduce setbits32/clrbits32/clrsetbits32/setbits64/clrbits64/clrsetbits64 functions
From: Christophe LEROY @ 2018-09-10 5:24 UTC (permalink / raw)
To: Corentin Labbe, Gilles.Muller, Julia.Lawall, agust,
alexandre.torgue, alistair, benh, carlo, davem, galak, joabreu,
khilman, maxime.ripard, michal.lkml, mpe, mporter, nicolas.palix,
oss, paulus, peppe.cavallaro, tj, vitb, wens
Cc: netdev, linux-kernel, linux-ide, linux-sunxi, linux-amlogic,
linuxppc-dev, cocci, linux-arm-kernel
In-Reply-To: <1536349307-20714-1-git-send-email-clabbe@baylibre.com>
Le 07/09/2018 à 21:41, Corentin Labbe a écrit :
> Hello
>
> This patchset adds a new set of functions which are open-coded in lot of
> place.
> Basicly the pattern is always the same, "read, modify a bit, write"
> some driver already have thoses pattern them as functions. (like ahci_sunxi.c or dwmac-meson8b)
>
> The first patch rename some powerpc funtions which already use the same name (xxxbits32)
> but with only bigendian values.
The same name as what ?
>
> The second patch adds the header.
But the second patch adds functions with the same name as the powerpc
ones but doing something different. Why consider that setbits32() should
be LE and not BE ?
Christophe
> The third patch is an ugly try to implement a coccinelle semantic patch to
> find all place where xxxbits function could be used.
> Probably this spatch could be better written and I didnt found an easy way to add the "linux/setbits" header.
>
> The two last patch are example of convertion of two drivers.
> Thoses patchs give an example of the reduction of code won by using xxxbits32.
>
> This patchset is tested with the ahci_sunxi and dwmac-sun8i drivers.
>
> Regards
>
> Corentin Labbe (5):
> powerpc: rename setbits32/clrbits32 to setbits32_be/clrbits32_be
> include: add
> setbits32/clrbits32/clrsetbits32/setbits64/clrbits64/clrsetbits64 in
> linux/setbits.h
> coccinelle: add xxxsetbitsXX converting spatch
> net: ethernet: stmmac: use xxxsetbits32
> ata: ahci_sunxi: use xxxsetbits32 functions
>
> arch/powerpc/include/asm/fsl_lbc.h | 2 +-
> arch/powerpc/include/asm/io.h | 5 +-
> arch/powerpc/platforms/44x/canyonlands.c | 4 +-
> arch/powerpc/platforms/4xx/gpio.c | 28 +-
> arch/powerpc/platforms/512x/pdm360ng.c | 6 +-
> arch/powerpc/platforms/52xx/mpc52xx_common.c | 6 +-
> arch/powerpc/platforms/52xx/mpc52xx_gpt.c | 10 +-
> arch/powerpc/platforms/82xx/ep8248e.c | 2 +-
> arch/powerpc/platforms/82xx/km82xx.c | 6 +-
> arch/powerpc/platforms/82xx/mpc8272_ads.c | 10 +-
> arch/powerpc/platforms/82xx/pq2.c | 2 +-
> arch/powerpc/platforms/82xx/pq2ads-pci-pic.c | 4 +-
> arch/powerpc/platforms/82xx/pq2fads.c | 10 +-
> arch/powerpc/platforms/83xx/km83xx.c | 6 +-
> arch/powerpc/platforms/83xx/mpc836x_mds.c | 2 +-
> arch/powerpc/platforms/85xx/mpc85xx_mds.c | 2 +-
> arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c | 4 +-
> arch/powerpc/platforms/85xx/mpc85xx_rdb.c | 2 +-
> arch/powerpc/platforms/85xx/p1022_ds.c | 4 +-
> arch/powerpc/platforms/85xx/p1022_rdk.c | 4 +-
> arch/powerpc/platforms/85xx/t1042rdb_diu.c | 4 +-
> arch/powerpc/platforms/85xx/twr_p102x.c | 2 +-
> arch/powerpc/platforms/86xx/mpc8610_hpcd.c | 4 +-
> arch/powerpc/platforms/8xx/adder875.c | 2 +-
> arch/powerpc/platforms/8xx/m8xx_setup.c | 4 +-
> arch/powerpc/platforms/8xx/mpc86xads_setup.c | 4 +-
> arch/powerpc/platforms/8xx/mpc885ads_setup.c | 28 +-
> arch/powerpc/platforms/embedded6xx/flipper-pic.c | 6 +-
> arch/powerpc/platforms/embedded6xx/hlwd-pic.c | 8 +-
> arch/powerpc/platforms/embedded6xx/wii.c | 10 +-
> arch/powerpc/sysdev/cpm1.c | 26 +-
> arch/powerpc/sysdev/cpm2.c | 16 +-
> arch/powerpc/sysdev/cpm_common.c | 4 +-
> arch/powerpc/sysdev/fsl_85xx_l2ctlr.c | 8 +-
> arch/powerpc/sysdev/fsl_lbc.c | 2 +-
> arch/powerpc/sysdev/fsl_pci.c | 8 +-
> arch/powerpc/sysdev/fsl_pmc.c | 2 +-
> arch/powerpc/sysdev/fsl_rcpm.c | 74 ++--
> arch/powerpc/sysdev/fsl_rio.c | 4 +-
> arch/powerpc/sysdev/fsl_rmu.c | 8 +-
> arch/powerpc/sysdev/mpic_timer.c | 12 +-
> drivers/ata/ahci_sunxi.c | 51 +--
> .../net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 54 +--
> drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 55 +--
> .../net/ethernet/stmicro/stmmac/dwmac1000_core.c | 21 +-
> drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 51 +--
> drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c | 13 +-
> drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c | 42 +-
> drivers/net/ethernet/stmicro/stmmac/dwmac5.c | 11 +-
> drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c | 17 +-
> .../net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 30 +-
> drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 69 +---
> .../net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c | 11 +-
> drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 7 +-
> include/linux/setbits.h | 55 +++
> scripts/coccinelle/misc/setbits.cocci | 423 +++++++++++++++++++++
> 56 files changed, 776 insertions(+), 489 deletions(-)
> create mode 100644 include/linux/setbits.h
> create mode 100644 scripts/coccinelle/misc/setbits.cocci
>
^ permalink raw reply
* Re: [PATCH 2/5] include: add setbits32/clrbits32/clrsetbits32/setbits64/clrbits64/clrsetbits64 in linux/setbits.h
From: Christophe LEROY @ 2018-09-10 5:22 UTC (permalink / raw)
To: Corentin Labbe, Gilles.Muller, Julia.Lawall, agust,
alexandre.torgue, alistair, benh, carlo, davem, galak, joabreu,
khilman, maxime.ripard, michal.lkml, mpe, mporter, nicolas.palix,
oss, paulus, peppe.cavallaro, tj, vitb, wens
Cc: netdev, linux-kernel, linux-ide, linux-sunxi, linux-amlogic,
linuxppc-dev, cocci, linux-arm-kernel
In-Reply-To: <1536349307-20714-3-git-send-email-clabbe@baylibre.com>
Le 07/09/2018 à 21:41, Corentin Labbe a écrit :
> This patch adds setbits32/clrbits32/clrsetbits32 and
> setbits64/clrbits64/clrsetbits64 in linux/setbits.h header.
So you changed the name of setbits32() ... to setbits32_be() and now you
are adding new functions called setbits32() ... which do something
different ?
What will happen if any file has been forgotten during the conversion,
or if anybody has outoftree drivers and missed this change ?
They will silently successfully compile without any error or warning,
and the result will be crap buggy.
And why would it be more legitim to have setbits32() be implicitely LE
instead of implicitely BE ?
I really think those new functions should be called something like
setbits_le32() ...
Christophe
>
> Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
> ---
> include/linux/setbits.h | 55 +++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 55 insertions(+)
> create mode 100644 include/linux/setbits.h
>
> diff --git a/include/linux/setbits.h b/include/linux/setbits.h
> new file mode 100644
> index 000000000000..3e1e273551bb
> --- /dev/null
> +++ b/include/linux/setbits.h
> @@ -0,0 +1,55 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __LINUX_SETBITS_H
> +#define __LINUX_SETBITS_H
> +
> +#include <linux/io.h>
> +
> +#define __setbits(readfunction, writefunction, addr, set) \
> + writefunction((readfunction(addr) | (set)), addr)
> +#define __clrbits(readfunction, writefunction, addr, mask) \
> + writefunction((readfunction(addr) & ~(mask)), addr)
> +#define __clrsetbits(readfunction, writefunction, addr, mask, set) \
> + writefunction(((readfunction(addr) & ~(mask)) | (set)), addr)
> +#define __setclrbits(readfunction, writefunction, addr, mask, set) \
> + writefunction(((readfunction(addr) | (seti)) & ~(mask)), addr)
> +
> +#define setbits32(addr, set) __setbits(readl, writel, addr, set)
> +#define setbits32_relaxed(addr, set) __setbits(readl_relaxed, writel_relaxed, \
> + addr, set)
> +
> +#define clrbits32(addr, mask) __clrbits(readl, writel, addr, mask)
> +#define clrbits32_relaxed(addr, mask) __clrbits(readl_relaxed, writel_relaxed, \
> + addr, mask)
> +
> +#define clrsetbits32(addr, mask, set) __clrsetbits(readl, writel, addr, mask, set)
> +#define clrsetbits32_relaxed(addr, mask, set) __clrsetbits(readl_relaxed, \
> + writel_relaxed, \
> + addr, mask, set)
> +
> +#define setclrbits32(addr, mask, set) __setclrbits(readl, writel, addr, mask, set)
> +#define setclrbits32_relaxed(addr, mask, set) __setclrbits(readl_relaxed, \
> + writel_relaxed, \
> + addr, mask, set)
> +
> +/* We cannot use CONFIG_64BIT as some x86 drivers use writeq */
> +#if defined(writeq) && defined(readq)
> +#define setbits64(addr, set) __setbits(readq, writeq, addr, set)
> +#define setbits64_relaxed(addr, set) __setbits(readq_relaxed, writeq_relaxed, \
> + addr, set)
> +
> +#define clrbits64(addr, mask) __clrbits(readq, writeq, addr, mask)
> +#define clrbits64_relaxed(addr, mask) __clrbits(readq_relaxed, writeq_relaxed, \
> + addr, mask)
> +
> +#define clrsetbits64(addr, mask, set) __clrsetbits(readq, writeq, addr, mask, set)
> +#define clrsetbits64_relaxed(addr, mask, set) __clrsetbits(readq_relaxed, \
> + writeq_relaxed, \
> + addr, mask, set)
> +
> +#define setclrbits64(addr, mask, set) __setclrbits(readq, writeq, addr, mask, set)
> +#define setclrbits64_relaxed(addr, mask, set) __setclrbits(readq_relaxed, \
> + writeq_relaxed, \
> + addr, mask, set)
> +#endif /* writeq/readq */
> +
> +#endif /* __LINUX_SETBITS_H */
>
^ permalink raw reply
* Re: [PATCH 1/5] powerpc: rename setbits32/clrbits32 to setbits32_be/clrbits32_be
From: Christophe LEROY @ 2018-09-10 5:16 UTC (permalink / raw)
To: Corentin Labbe, Gilles.Muller, Julia.Lawall, agust,
alexandre.torgue, alistair, benh, carlo, davem, galak, joabreu,
khilman, maxime.ripard, michal.lkml, mpe, mporter, nicolas.palix,
oss, paulus, peppe.cavallaro, tj, vitb, wens
Cc: netdev, linux-kernel, linux-ide, linux-sunxi, linux-amlogic,
linuxppc-dev, cocci, linux-arm-kernel
In-Reply-To: <1536349307-20714-2-git-send-email-clabbe@baylibre.com>
Le 07/09/2018 à 21:41, Corentin Labbe a écrit :
> Since setbits32/clrbits32 work on be32, it's better to remove ambiguity on
> the used data type.
Wouldn't it be better to call them setbits_be32() / clrbits_be32() to
have something looking similar to in_be32() / ou_be32() ?
Christophe
>
> Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
> ---
> arch/powerpc/include/asm/fsl_lbc.h | 2 +-
> arch/powerpc/include/asm/io.h | 5 +-
> arch/powerpc/platforms/44x/canyonlands.c | 4 +-
> arch/powerpc/platforms/4xx/gpio.c | 28 ++++-----
> arch/powerpc/platforms/512x/pdm360ng.c | 6 +-
> arch/powerpc/platforms/52xx/mpc52xx_common.c | 6 +-
> arch/powerpc/platforms/52xx/mpc52xx_gpt.c | 10 ++--
> arch/powerpc/platforms/82xx/ep8248e.c | 2 +-
> arch/powerpc/platforms/82xx/km82xx.c | 6 +-
> arch/powerpc/platforms/82xx/mpc8272_ads.c | 10 ++--
> arch/powerpc/platforms/82xx/pq2.c | 2 +-
> arch/powerpc/platforms/82xx/pq2ads-pci-pic.c | 4 +-
> arch/powerpc/platforms/82xx/pq2fads.c | 10 ++--
> arch/powerpc/platforms/83xx/km83xx.c | 6 +-
> arch/powerpc/platforms/83xx/mpc836x_mds.c | 2 +-
> arch/powerpc/platforms/85xx/mpc85xx_mds.c | 2 +-
> arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c | 4 +-
> arch/powerpc/platforms/85xx/mpc85xx_rdb.c | 2 +-
> arch/powerpc/platforms/85xx/p1022_ds.c | 4 +-
> arch/powerpc/platforms/85xx/p1022_rdk.c | 4 +-
> arch/powerpc/platforms/85xx/t1042rdb_diu.c | 4 +-
> arch/powerpc/platforms/85xx/twr_p102x.c | 2 +-
> arch/powerpc/platforms/86xx/mpc8610_hpcd.c | 4 +-
> arch/powerpc/platforms/8xx/adder875.c | 2 +-
> arch/powerpc/platforms/8xx/m8xx_setup.c | 4 +-
> arch/powerpc/platforms/8xx/mpc86xads_setup.c | 4 +-
> arch/powerpc/platforms/8xx/mpc885ads_setup.c | 28 ++++-----
> arch/powerpc/platforms/embedded6xx/flipper-pic.c | 6 +-
> arch/powerpc/platforms/embedded6xx/hlwd-pic.c | 8 +--
> arch/powerpc/platforms/embedded6xx/wii.c | 10 ++--
> arch/powerpc/sysdev/cpm1.c | 26 ++++-----
> arch/powerpc/sysdev/cpm2.c | 16 ++---
> arch/powerpc/sysdev/cpm_common.c | 4 +-
> arch/powerpc/sysdev/fsl_85xx_l2ctlr.c | 8 +--
> arch/powerpc/sysdev/fsl_lbc.c | 2 +-
> arch/powerpc/sysdev/fsl_pci.c | 8 +--
> arch/powerpc/sysdev/fsl_pmc.c | 2 +-
> arch/powerpc/sysdev/fsl_rcpm.c | 74 ++++++++++++------------
> arch/powerpc/sysdev/fsl_rio.c | 4 +-
> arch/powerpc/sysdev/fsl_rmu.c | 8 +--
> arch/powerpc/sysdev/mpic_timer.c | 12 ++--
> 41 files changed, 178 insertions(+), 177 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/fsl_lbc.h b/arch/powerpc/include/asm/fsl_lbc.h
> index c7240a024b96..55d7aa0c27cf 100644
> --- a/arch/powerpc/include/asm/fsl_lbc.h
> +++ b/arch/powerpc/include/asm/fsl_lbc.h
> @@ -276,7 +276,7 @@ static inline void fsl_upm_start_pattern(struct fsl_upm *upm, u8 pat_offset)
> */
> static inline void fsl_upm_end_pattern(struct fsl_upm *upm)
> {
> - clrbits32(upm->mxmr, MxMR_OP_RP);
> + clrbits32_be(upm->mxmr, MxMR_OP_RP);
>
> while (in_be32(upm->mxmr) & MxMR_OP_RP)
> cpu_relax();
> diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
> index e0331e754568..29ecefd41ecb 100644
> --- a/arch/powerpc/include/asm/io.h
> +++ b/arch/powerpc/include/asm/io.h
> @@ -873,8 +873,8 @@ static inline void * bus_to_virt(unsigned long address)
> #endif /* CONFIG_PPC32 */
>
> /* access ports */
> -#define setbits32(_addr, _v) out_be32((_addr), in_be32(_addr) | (_v))
> -#define clrbits32(_addr, _v) out_be32((_addr), in_be32(_addr) & ~(_v))
> +#define setbits32_be(_addr, _v) out_be32((_addr), in_be32(_addr) | (_v))
> +#define clrbits32_be(_addr, _v) out_be32((_addr), in_be32(_addr) & ~(_v))
>
> #define setbits16(_addr, _v) out_be16((_addr), in_be16(_addr) | (_v))
> #define clrbits16(_addr, _v) out_be16((_addr), in_be16(_addr) & ~(_v))
> @@ -904,6 +904,7 @@ static inline void * bus_to_virt(unsigned long address)
> #define clrsetbits_le16(addr, clear, set) clrsetbits(le16, addr, clear, set)
>
> #define clrsetbits_8(addr, clear, set) clrsetbits(8, addr, clear, set)
> +#define clrsetbits32_be(addr, clear, set) clrsetbits(be32, addr, clear, set)
>
> #endif /* __KERNEL__ */
>
> diff --git a/arch/powerpc/platforms/44x/canyonlands.c b/arch/powerpc/platforms/44x/canyonlands.c
> index 157f4ce46386..7145a730769d 100644
> --- a/arch/powerpc/platforms/44x/canyonlands.c
> +++ b/arch/powerpc/platforms/44x/canyonlands.c
> @@ -113,8 +113,8 @@ static int __init ppc460ex_canyonlands_fixup(void)
> * USB2HStop and gpio19 will be USB2DStop. For more details refer to
> * table 34-7 of PPC460EX user manual.
> */
> - setbits32((vaddr + GPIO0_OSRH), 0x42000000);
> - setbits32((vaddr + GPIO0_TSRH), 0x42000000);
> + setbits32_be((vaddr + GPIO0_OSRH), 0x42000000);
> + setbits32_be((vaddr + GPIO0_TSRH), 0x42000000);
> err_gpio:
> iounmap(vaddr);
> err_bcsr:
> diff --git a/arch/powerpc/platforms/4xx/gpio.c b/arch/powerpc/platforms/4xx/gpio.c
> index 2238e369cde4..e84f2d20674e 100644
> --- a/arch/powerpc/platforms/4xx/gpio.c
> +++ b/arch/powerpc/platforms/4xx/gpio.c
> @@ -82,9 +82,9 @@ __ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
> struct ppc4xx_gpio __iomem *regs = mm_gc->regs;
>
> if (val)
> - setbits32(®s->or, GPIO_MASK(gpio));
> + setbits32_be(®s->or, GPIO_MASK(gpio));
> else
> - clrbits32(®s->or, GPIO_MASK(gpio));
> + clrbits32_be(®s->or, GPIO_MASK(gpio));
> }
>
> static void
> @@ -112,18 +112,18 @@ static int ppc4xx_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
> spin_lock_irqsave(&chip->lock, flags);
>
> /* Disable open-drain function */
> - clrbits32(®s->odr, GPIO_MASK(gpio));
> + clrbits32_be(®s->odr, GPIO_MASK(gpio));
>
> /* Float the pin */
> - clrbits32(®s->tcr, GPIO_MASK(gpio));
> + clrbits32_be(®s->tcr, GPIO_MASK(gpio));
>
> /* Bits 0-15 use TSRL/OSRL, bits 16-31 use TSRH/OSRH */
> if (gpio < 16) {
> - clrbits32(®s->osrl, GPIO_MASK2(gpio));
> - clrbits32(®s->tsrl, GPIO_MASK2(gpio));
> + clrbits32_be(®s->osrl, GPIO_MASK2(gpio));
> + clrbits32_be(®s->tsrl, GPIO_MASK2(gpio));
> } else {
> - clrbits32(®s->osrh, GPIO_MASK2(gpio));
> - clrbits32(®s->tsrh, GPIO_MASK2(gpio));
> + clrbits32_be(®s->osrh, GPIO_MASK2(gpio));
> + clrbits32_be(®s->tsrh, GPIO_MASK2(gpio));
> }
>
> spin_unlock_irqrestore(&chip->lock, flags);
> @@ -145,18 +145,18 @@ ppc4xx_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
> __ppc4xx_gpio_set(gc, gpio, val);
>
> /* Disable open-drain function */
> - clrbits32(®s->odr, GPIO_MASK(gpio));
> + clrbits32_be(®s->odr, GPIO_MASK(gpio));
>
> /* Drive the pin */
> - setbits32(®s->tcr, GPIO_MASK(gpio));
> + setbits32_be(®s->tcr, GPIO_MASK(gpio));
>
> /* Bits 0-15 use TSRL, bits 16-31 use TSRH */
> if (gpio < 16) {
> - clrbits32(®s->osrl, GPIO_MASK2(gpio));
> - clrbits32(®s->tsrl, GPIO_MASK2(gpio));
> + clrbits32_be(®s->osrl, GPIO_MASK2(gpio));
> + clrbits32_be(®s->tsrl, GPIO_MASK2(gpio));
> } else {
> - clrbits32(®s->osrh, GPIO_MASK2(gpio));
> - clrbits32(®s->tsrh, GPIO_MASK2(gpio));
> + clrbits32_be(®s->osrh, GPIO_MASK2(gpio));
> + clrbits32_be(®s->tsrh, GPIO_MASK2(gpio));
> }
>
> spin_unlock_irqrestore(&chip->lock, flags);
> diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c
> index dc81f05e0bce..283925e49096 100644
> --- a/arch/powerpc/platforms/512x/pdm360ng.c
> +++ b/arch/powerpc/platforms/512x/pdm360ng.c
> @@ -38,7 +38,7 @@ static int pdm360ng_get_pendown_state(void)
>
> reg = in_be32(pdm360ng_gpio_base + 0xc);
> if (reg & 0x40)
> - setbits32(pdm360ng_gpio_base + 0xc, 0x40);
> + setbits32_be(pdm360ng_gpio_base + 0xc, 0x40);
>
> reg = in_be32(pdm360ng_gpio_base + 0x8);
>
> @@ -69,8 +69,8 @@ static int __init pdm360ng_penirq_init(void)
> return -ENODEV;
> }
> out_be32(pdm360ng_gpio_base + 0xc, 0xffffffff);
> - setbits32(pdm360ng_gpio_base + 0x18, 0x2000);
> - setbits32(pdm360ng_gpio_base + 0x10, 0x40);
> + setbits32_be(pdm360ng_gpio_base + 0x18, 0x2000);
> + setbits32_be(pdm360ng_gpio_base + 0x10, 0x40);
>
> return 0;
> }
> diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c
> index 565e3a83dc9e..8a8b3d79798d 100644
> --- a/arch/powerpc/platforms/52xx/mpc52xx_common.c
> +++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c
> @@ -314,13 +314,13 @@ int mpc5200_psc_ac97_gpio_reset(int psc_number)
>
> /* enable gpio pins for output */
> setbits8(&wkup_gpio->wkup_gpioe, reset);
> - setbits32(&simple_gpio->simple_gpioe, sync | out);
> + setbits32_be(&simple_gpio->simple_gpioe, sync | out);
>
> setbits8(&wkup_gpio->wkup_ddr, reset);
> - setbits32(&simple_gpio->simple_ddr, sync | out);
> + setbits32_be(&simple_gpio->simple_ddr, sync | out);
>
> /* Assert cold reset */
> - clrbits32(&simple_gpio->simple_dvo, sync | out);
> + clrbits32_be(&simple_gpio->simple_dvo, sync | out);
> clrbits8(&wkup_gpio->wkup_dvo, reset);
>
> /* wait for 1 us */
> diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
> index 17cf249b18ee..88eef86f802c 100644
> --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
> +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
> @@ -142,7 +142,7 @@ static void mpc52xx_gpt_irq_unmask(struct irq_data *d)
> unsigned long flags;
>
> raw_spin_lock_irqsave(&gpt->lock, flags);
> - setbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN);
> + setbits32_be(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN);
> raw_spin_unlock_irqrestore(&gpt->lock, flags);
> }
>
> @@ -152,7 +152,7 @@ static void mpc52xx_gpt_irq_mask(struct irq_data *d)
> unsigned long flags;
>
> raw_spin_lock_irqsave(&gpt->lock, flags);
> - clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN);
> + clrbits32_be(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN);
> raw_spin_unlock_irqrestore(&gpt->lock, flags);
> }
>
> @@ -308,7 +308,7 @@ static int mpc52xx_gpt_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
> dev_dbg(gpt->dev, "%s: gpio:%d\n", __func__, gpio);
>
> raw_spin_lock_irqsave(&gpt->lock, flags);
> - clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK);
> + clrbits32_be(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK);
> raw_spin_unlock_irqrestore(&gpt->lock, flags);
>
> return 0;
> @@ -482,7 +482,7 @@ int mpc52xx_gpt_stop_timer(struct mpc52xx_gpt_priv *gpt)
> return -EBUSY;
> }
>
> - clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_COUNTER_ENABLE);
> + clrbits32_be(&gpt->regs->mode, MPC52xx_GPT_MODE_COUNTER_ENABLE);
> raw_spin_unlock_irqrestore(&gpt->lock, flags);
> return 0;
> }
> @@ -639,7 +639,7 @@ static int mpc52xx_wdt_release(struct inode *inode, struct file *file)
> unsigned long flags;
>
> raw_spin_lock_irqsave(&gpt_wdt->lock, flags);
> - clrbits32(&gpt_wdt->regs->mode,
> + clrbits32_be(&gpt_wdt->regs->mode,
> MPC52xx_GPT_MODE_COUNTER_ENABLE | MPC52xx_GPT_MODE_WDT_EN);
> gpt_wdt->wdt_mode &= ~MPC52xx_GPT_IS_WDT;
> raw_spin_unlock_irqrestore(&gpt_wdt->lock, flags);
> diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c
> index 8fec050f2d5b..da4fee98085f 100644
> --- a/arch/powerpc/platforms/82xx/ep8248e.c
> +++ b/arch/powerpc/platforms/82xx/ep8248e.c
> @@ -262,7 +262,7 @@ static void __init ep8248e_setup_arch(void)
> /* When this is set, snooping CPM DMA from RAM causes
> * machine checks. See erratum SIU18.
> */
> - clrbits32(&cpm2_immr->im_siu_conf.siu_82xx.sc_bcr, MPC82XX_BCR_PLDP);
> + clrbits32_be(&cpm2_immr->im_siu_conf.siu_82xx.sc_bcr, MPC82XX_BCR_PLDP);
>
> ep8248e_bcsr_node =
> of_find_compatible_node(NULL, NULL, "fsl,ep8248e-bcsr");
> diff --git a/arch/powerpc/platforms/82xx/km82xx.c b/arch/powerpc/platforms/82xx/km82xx.c
> index 28860e40b5db..b5b34f8b1a9b 100644
> --- a/arch/powerpc/platforms/82xx/km82xx.c
> +++ b/arch/powerpc/platforms/82xx/km82xx.c
> @@ -157,9 +157,9 @@ static void __init init_ioports(void)
> cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK14, CPM_CLK_TX);
>
> /* Force USB FULL SPEED bit to '1' */
> - setbits32(&cpm2_immr->im_ioport.iop_pdata, 1 << (31 - 10));
> + setbits32_be(&cpm2_immr->im_ioport.iop_pdata, 1 << (31 - 10));
> /* clear USB_SLAVE */
> - clrbits32(&cpm2_immr->im_ioport.iop_pdata, 1 << (31 - 11));
> + clrbits32_be(&cpm2_immr->im_ioport.iop_pdata, 1 << (31 - 11));
> }
>
> static void __init km82xx_setup_arch(void)
> @@ -172,7 +172,7 @@ static void __init km82xx_setup_arch(void)
> /* When this is set, snooping CPM DMA from RAM causes
> * machine checks. See erratum SIU18.
> */
> - clrbits32(&cpm2_immr->im_siu_conf.siu_82xx.sc_bcr, MPC82XX_BCR_PLDP);
> + clrbits32_be(&cpm2_immr->im_siu_conf.siu_82xx.sc_bcr, MPC82XX_BCR_PLDP);
>
> init_ioports();
>
> diff --git a/arch/powerpc/platforms/82xx/mpc8272_ads.c b/arch/powerpc/platforms/82xx/mpc8272_ads.c
> index d23c10a96bde..a9c8cd13a4b5 100644
> --- a/arch/powerpc/platforms/82xx/mpc8272_ads.c
> +++ b/arch/powerpc/platforms/82xx/mpc8272_ads.c
> @@ -164,13 +164,13 @@ static void __init mpc8272_ads_setup_arch(void)
> #define BCSR3_FETHIEN2 0x10000000
> #define BCSR3_FETH2_RST 0x08000000
>
> - clrbits32(&bcsr[1], BCSR1_RS232_EN1 | BCSR1_RS232_EN2 | BCSR1_FETHIEN);
> - setbits32(&bcsr[1], BCSR1_FETH_RST);
> + clrbits32_be(&bcsr[1], BCSR1_RS232_EN1 | BCSR1_RS232_EN2 | BCSR1_FETHIEN);
> + setbits32_be(&bcsr[1], BCSR1_FETH_RST);
>
> - clrbits32(&bcsr[3], BCSR3_FETHIEN2);
> - setbits32(&bcsr[3], BCSR3_FETH2_RST);
> + clrbits32_be(&bcsr[3], BCSR3_FETHIEN2);
> + setbits32_be(&bcsr[3], BCSR3_FETH2_RST);
>
> - clrbits32(&bcsr[3], BCSR3_USB_nEN);
> + clrbits32_be(&bcsr[3], BCSR3_USB_nEN);
>
> iounmap(bcsr);
>
> diff --git a/arch/powerpc/platforms/82xx/pq2.c b/arch/powerpc/platforms/82xx/pq2.c
> index c4f7029fc9ae..43a9a948f064 100644
> --- a/arch/powerpc/platforms/82xx/pq2.c
> +++ b/arch/powerpc/platforms/82xx/pq2.c
> @@ -25,7 +25,7 @@
> void __noreturn pq2_restart(char *cmd)
> {
> local_irq_disable();
> - setbits32(&cpm2_immr->im_clkrst.car_rmr, RMR_CSRE);
> + setbits32_be(&cpm2_immr->im_clkrst.car_rmr, RMR_CSRE);
>
> /* Clear the ME,EE,IR & DR bits in MSR to cause checkstop */
> mtmsr(mfmsr() & ~(MSR_ME | MSR_EE | MSR_IR | MSR_DR));
> diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
> index 8b065bdf7412..b691de4c580a 100644
> --- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
> +++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
> @@ -47,7 +47,7 @@ static void pq2ads_pci_mask_irq(struct irq_data *d)
> unsigned long flags;
> raw_spin_lock_irqsave(&pci_pic_lock, flags);
>
> - setbits32(&priv->regs->mask, 1 << irq);
> + setbits32_be(&priv->regs->mask, 1 << irq);
> mb();
>
> raw_spin_unlock_irqrestore(&pci_pic_lock, flags);
> @@ -63,7 +63,7 @@ static void pq2ads_pci_unmask_irq(struct irq_data *d)
> unsigned long flags;
>
> raw_spin_lock_irqsave(&pci_pic_lock, flags);
> - clrbits32(&priv->regs->mask, 1 << irq);
> + clrbits32_be(&priv->regs->mask, 1 << irq);
> raw_spin_unlock_irqrestore(&pci_pic_lock, flags);
> }
> }
> diff --git a/arch/powerpc/platforms/82xx/pq2fads.c b/arch/powerpc/platforms/82xx/pq2fads.c
> index 6c654dc74a4b..05e9c743712f 100644
> --- a/arch/powerpc/platforms/82xx/pq2fads.c
> +++ b/arch/powerpc/platforms/82xx/pq2fads.c
> @@ -140,18 +140,18 @@ static void __init pq2fads_setup_arch(void)
>
> /* Enable the serial and ethernet ports */
>
> - clrbits32(&bcsr[1], BCSR1_RS232_EN1 | BCSR1_RS232_EN2 | BCSR1_FETHIEN);
> - setbits32(&bcsr[1], BCSR1_FETH_RST);
> + clrbits32_be(&bcsr[1], BCSR1_RS232_EN1 | BCSR1_RS232_EN2 | BCSR1_FETHIEN);
> + setbits32_be(&bcsr[1], BCSR1_FETH_RST);
>
> - clrbits32(&bcsr[3], BCSR3_FETHIEN2);
> - setbits32(&bcsr[3], BCSR3_FETH2_RST);
> + clrbits32_be(&bcsr[3], BCSR3_FETHIEN2);
> + setbits32_be(&bcsr[3], BCSR3_FETH2_RST);
>
> iounmap(bcsr);
>
> init_ioports();
>
> /* Enable external IRQs */
> - clrbits32(&cpm2_immr->im_siu_conf.siu_82xx.sc_siumcr, 0x0c000000);
> + clrbits32_be(&cpm2_immr->im_siu_conf.siu_82xx.sc_siumcr, 0x0c000000);
>
> pq2_init_pci();
>
> diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c
> index d8642a4afc74..d13f11aac111 100644
> --- a/arch/powerpc/platforms/83xx/km83xx.c
> +++ b/arch/powerpc/platforms/83xx/km83xx.c
> @@ -101,19 +101,19 @@ static void quirk_mpc8360e_qe_enet10(void)
> * UCC1: write 0b11 to bits 18:19
> * at address IMMRBAR+0x14A8
> */
> - setbits32((base + 0xa8), 0x00003000);
> + setbits32_be((base + 0xa8), 0x00003000);
>
> /*
> * UCC2 option 1: write 0b11 to bits 4:5
> * at address IMMRBAR+0x14A8
> */
> - setbits32((base + 0xa8), 0x0c000000);
> + setbits32_be((base + 0xa8), 0x0c000000);
>
> /*
> * UCC2 option 2: write 0b11 to bits 16:17
> * at address IMMRBAR+0x14AC
> */
> - setbits32((base + 0xac), 0x0000c000);
> + setbits32_be((base + 0xac), 0x0000c000);
> }
> iounmap(base);
> of_node_put(np_par);
> diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c
> index fd44dd03e1f3..56e638fdbbc5 100644
> --- a/arch/powerpc/platforms/83xx/mpc836x_mds.c
> +++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c
> @@ -118,7 +118,7 @@ static void __init mpc836x_mds_setup_arch(void)
> * IMMR + 0x14A8[4:5] = 11 (clk delay for UCC 2)
> * IMMR + 0x14A8[18:19] = 11 (clk delay for UCC 1)
> */
> - setbits32(immap, 0x0c003000);
> + setbits32_be(immap, 0x0c003000);
>
> /*
> * IMMR + 0x14AC[20:27] = 10101010
> diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
> index d7e440e6dba3..06c18149dc5a 100644
> --- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
> +++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
> @@ -262,7 +262,7 @@ static void __init mpc85xx_mds_qe_init(void)
> * and QE12 for QE MII management signals in PMUXCR
> * register.
> */
> - setbits32(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) |
> + setbits32_be(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) |
> MPC85xx_PMUXCR_QE(3) |
> MPC85xx_PMUXCR_QE(9) |
> MPC85xx_PMUXCR_QE(12));
> diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
> index f05325f0cc03..b1bb81a49a7f 100644
> --- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
> +++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
> @@ -60,9 +60,9 @@ static void mpc85xx_freeze_time_base(bool freeze)
>
> mask = CCSR_GUTS_DEVDISR_TB0 | CCSR_GUTS_DEVDISR_TB1;
> if (freeze)
> - setbits32(&guts->devdisr, mask);
> + setbits32_be(&guts->devdisr, mask);
> else
> - clrbits32(&guts->devdisr, mask);
> + clrbits32_be(&guts->devdisr, mask);
>
> in_be32(&guts->devdisr);
> }
> diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
> index 10069503e39f..13ae0b12dd5a 100644
> --- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
> +++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
> @@ -115,7 +115,7 @@ static void __init mpc85xx_rdb_setup_arch(void)
> * and QE12 for QE MII management singals in PMUXCR
> * register.
> */
> - setbits32(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) |
> + setbits32_be(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) |
> MPC85xx_PMUXCR_QE(3) |
> MPC85xx_PMUXCR_QE(9) |
> MPC85xx_PMUXCR_QE(12));
> diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
> index 9fb57f78cdbe..adb7abdd291f 100644
> --- a/arch/powerpc/platforms/85xx/p1022_ds.c
> +++ b/arch/powerpc/platforms/85xx/p1022_ds.c
> @@ -405,11 +405,11 @@ void p1022ds_set_pixel_clock(unsigned int pixclock)
> pxclk = clamp_t(u32, pxclk, 2, 255);
>
> /* Disable the pixel clock, and set it to non-inverted and no delay */
> - clrbits32(&guts->clkdvdr,
> + clrbits32_be(&guts->clkdvdr,
> CLKDVDR_PXCKEN | CLKDVDR_PXCKDLY | CLKDVDR_PXCLK_MASK);
>
> /* Enable the clock and set the pxclk */
> - setbits32(&guts->clkdvdr, CLKDVDR_PXCKEN | (pxclk << 16));
> + setbits32_be(&guts->clkdvdr, CLKDVDR_PXCKEN | (pxclk << 16));
>
> iounmap(guts);
> }
> diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c
> index 276e00ab3dde..97698230f031 100644
> --- a/arch/powerpc/platforms/85xx/p1022_rdk.c
> +++ b/arch/powerpc/platforms/85xx/p1022_rdk.c
> @@ -75,11 +75,11 @@ void p1022rdk_set_pixel_clock(unsigned int pixclock)
> pxclk = clamp_t(u32, pxclk, 2, 255);
>
> /* Disable the pixel clock, and set it to non-inverted and no delay */
> - clrbits32(&guts->clkdvdr,
> + clrbits32_be(&guts->clkdvdr,
> CLKDVDR_PXCKEN | CLKDVDR_PXCKDLY | CLKDVDR_PXCLK_MASK);
>
> /* Enable the clock and set the pxclk */
> - setbits32(&guts->clkdvdr, CLKDVDR_PXCKEN | (pxclk << 16));
> + setbits32_be(&guts->clkdvdr, CLKDVDR_PXCKEN | (pxclk << 16));
>
> iounmap(guts);
> }
> diff --git a/arch/powerpc/platforms/85xx/t1042rdb_diu.c b/arch/powerpc/platforms/85xx/t1042rdb_diu.c
> index dac36ba82fea..c11f95711a8a 100644
> --- a/arch/powerpc/platforms/85xx/t1042rdb_diu.c
> +++ b/arch/powerpc/platforms/85xx/t1042rdb_diu.c
> @@ -114,11 +114,11 @@ static void t1042rdb_set_pixel_clock(unsigned int pixclock)
> pxclk = clamp_t(u32, pxclk, 2, 255);
>
> /* Disable the pixel clock, and set it to non-inverted and no delay */
> - clrbits32(scfg + CCSR_SCFG_PIXCLKCR,
> + clrbits32_be(scfg + CCSR_SCFG_PIXCLKCR,
> PIXCLKCR_PXCKEN | PIXCLKCR_PXCKDLY | PIXCLKCR_PXCLK_MASK);
>
> /* Enable the clock and set the pxclk */
> - setbits32(scfg + CCSR_SCFG_PIXCLKCR, PIXCLKCR_PXCKEN | (pxclk << 16));
> + setbits32_be(scfg + CCSR_SCFG_PIXCLKCR, PIXCLKCR_PXCKEN | (pxclk << 16));
>
> iounmap(scfg);
> }
> diff --git a/arch/powerpc/platforms/85xx/twr_p102x.c b/arch/powerpc/platforms/85xx/twr_p102x.c
> index 360f6253e9ff..b678ee2665d0 100644
> --- a/arch/powerpc/platforms/85xx/twr_p102x.c
> +++ b/arch/powerpc/platforms/85xx/twr_p102x.c
> @@ -95,7 +95,7 @@ static void __init twr_p1025_setup_arch(void)
> * and QE12 for QE MII management signals in PMUXCR
> * register.
> * Set QE mux bits in PMUXCR */
> - setbits32(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) |
> + setbits32_be(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) |
> MPC85xx_PMUXCR_QE(3) |
> MPC85xx_PMUXCR_QE(9) |
> MPC85xx_PMUXCR_QE(12));
> diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
> index a5d73fabe4d1..78472179b05a 100644
> --- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
> +++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
> @@ -261,11 +261,11 @@ void mpc8610hpcd_set_pixel_clock(unsigned int pixclock)
> pxclk = clamp_t(u32, pxclk, 2, 31);
>
> /* Disable the pixel clock, and set it to non-inverted and no delay */
> - clrbits32(&guts->clkdvdr,
> + clrbits32_be(&guts->clkdvdr,
> CLKDVDR_PXCKEN | CLKDVDR_PXCKDLY | CLKDVDR_PXCLK_MASK);
>
> /* Enable the clock and set the pxclk */
> - setbits32(&guts->clkdvdr, CLKDVDR_PXCKEN | (pxclk << 16));
> + setbits32_be(&guts->clkdvdr, CLKDVDR_PXCKEN | (pxclk << 16));
>
> iounmap(guts);
> }
> diff --git a/arch/powerpc/platforms/8xx/adder875.c b/arch/powerpc/platforms/8xx/adder875.c
> index bcef9f66191e..d21d0b8fd2a7 100644
> --- a/arch/powerpc/platforms/8xx/adder875.c
> +++ b/arch/powerpc/platforms/8xx/adder875.c
> @@ -77,7 +77,7 @@ static void __init init_ioports(void)
> cpm1_clk_setup(CPM_CLK_SMC1, CPM_BRG1, CPM_CLK_RTX);
>
> /* Set FEC1 and FEC2 to MII mode */
> - clrbits32(&mpc8xx_immr->im_cpm.cp_cptr, 0x00000180);
> + clrbits32_be(&mpc8xx_immr->im_cpm.cp_cptr, 0x00000180);
> }
>
> static void __init adder875_setup(void)
> diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
> index 027c42d8966c..2ed24abd0b40 100644
> --- a/arch/powerpc/platforms/8xx/m8xx_setup.c
> +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
> @@ -103,7 +103,7 @@ void __init mpc8xx_calibrate_decr(void)
>
> /* Force all 8xx processors to use divide by 16 processor clock. */
> clk_r2 = immr_map(im_clkrst);
> - setbits32(&clk_r2->car_sccr, 0x02000000);
> + setbits32_be(&clk_r2->car_sccr, 0x02000000);
> immr_unmap(clk_r2);
>
> /* Processor frequency is MHz.
> @@ -203,7 +203,7 @@ void __noreturn mpc8xx_restart(char *cmd)
>
> local_irq_disable();
>
> - setbits32(&clk_r->car_plprcr, 0x00000080);
> + setbits32_be(&clk_r->car_plprcr, 0x00000080);
> /* Clear the ME bit in MSR to cause checkstop on machine check
> */
> mtmsr(mfmsr() & ~0x1000);
> diff --git a/arch/powerpc/platforms/8xx/mpc86xads_setup.c b/arch/powerpc/platforms/8xx/mpc86xads_setup.c
> index 8d02f5ff4481..a25e5ab15d65 100644
> --- a/arch/powerpc/platforms/8xx/mpc86xads_setup.c
> +++ b/arch/powerpc/platforms/8xx/mpc86xads_setup.c
> @@ -87,7 +87,7 @@ static void __init init_ioports(void)
> cpm1_clk_setup(CPM_CLK_SCC1, CPM_CLK2, CPM_CLK_RX);
>
> /* Set FEC1 and FEC2 to MII mode */
> - clrbits32(&mpc8xx_immr->im_cpm.cp_cptr, 0x00000180);
> + clrbits32_be(&mpc8xx_immr->im_cpm.cp_cptr, 0x00000180);
> }
>
> static void __init mpc86xads_setup_arch(void)
> @@ -112,7 +112,7 @@ static void __init mpc86xads_setup_arch(void)
> return;
> }
>
> - clrbits32(bcsr_io, BCSR1_RS232EN_1 | BCSR1_RS232EN_2 | BCSR1_ETHEN);
> + clrbits32_be(bcsr_io, BCSR1_RS232EN_1 | BCSR1_RS232EN_2 | BCSR1_ETHEN);
> iounmap(bcsr_io);
> }
>
> diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
> index a0c83c1905c6..8aad0fb9090b 100644
> --- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c
> +++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
> @@ -123,7 +123,7 @@ static void __init init_ioports(void)
> cpm1_clk_setup(CPM_CLK_SCC3, CPM_CLK6, CPM_CLK_RX);
>
> /* Set FEC1 and FEC2 to MII mode */
> - clrbits32(&mpc8xx_immr->im_cpm.cp_cptr, 0x00000180);
> + clrbits32_be(&mpc8xx_immr->im_cpm.cp_cptr, 0x00000180);
> }
>
> static void __init mpc885ads_setup_arch(void)
> @@ -148,33 +148,33 @@ static void __init mpc885ads_setup_arch(void)
> return;
> }
>
> - clrbits32(&bcsr[1], BCSR1_RS232EN_1);
> + clrbits32_be(&bcsr[1], BCSR1_RS232EN_1);
> #ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2
> - setbits32(&bcsr[1], BCSR1_RS232EN_2);
> + setbits32_be(&bcsr[1], BCSR1_RS232EN_2);
> #else
> - clrbits32(&bcsr[1], BCSR1_RS232EN_2);
> + clrbits32_be(&bcsr[1], BCSR1_RS232EN_2);
> #endif
>
> - clrbits32(bcsr5, BCSR5_MII1_EN);
> - setbits32(bcsr5, BCSR5_MII1_RST);
> + clrbits32_be(bcsr5, BCSR5_MII1_EN);
> + setbits32_be(bcsr5, BCSR5_MII1_RST);
> udelay(1000);
> - clrbits32(bcsr5, BCSR5_MII1_RST);
> + clrbits32_be(bcsr5, BCSR5_MII1_RST);
>
> #ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2
> - clrbits32(bcsr5, BCSR5_MII2_EN);
> - setbits32(bcsr5, BCSR5_MII2_RST);
> + clrbits32_be(bcsr5, BCSR5_MII2_EN);
> + setbits32_be(bcsr5, BCSR5_MII2_RST);
> udelay(1000);
> - clrbits32(bcsr5, BCSR5_MII2_RST);
> + clrbits32_be(bcsr5, BCSR5_MII2_RST);
> #else
> - setbits32(bcsr5, BCSR5_MII2_EN);
> + setbits32_be(bcsr5, BCSR5_MII2_EN);
> #endif
>
> #ifdef CONFIG_MPC8xx_SECOND_ETH_SCC3
> - clrbits32(&bcsr[4], BCSR4_ETH10_RST);
> + clrbits32_be(&bcsr[4], BCSR4_ETH10_RST);
> udelay(1000);
> - setbits32(&bcsr[4], BCSR4_ETH10_RST);
> + setbits32_be(&bcsr[4], BCSR4_ETH10_RST);
>
> - setbits32(&bcsr[1], BCSR1_ETHEN);
> + setbits32_be(&bcsr[1], BCSR1_ETHEN);
>
> np = of_find_node_by_path("/soc@ff000000/cpm@9c0/serial@a80");
> #else
> diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
> index db0be007fd06..6df4533aa851 100644
> --- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c
> +++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
> @@ -53,7 +53,7 @@ static void flipper_pic_mask_and_ack(struct irq_data *d)
> void __iomem *io_base = irq_data_get_irq_chip_data(d);
> u32 mask = 1 << irq;
>
> - clrbits32(io_base + FLIPPER_IMR, mask);
> + clrbits32_be(io_base + FLIPPER_IMR, mask);
> /* this is at least needed for RSW */
> out_be32(io_base + FLIPPER_ICR, mask);
> }
> @@ -72,7 +72,7 @@ static void flipper_pic_mask(struct irq_data *d)
> int irq = irqd_to_hwirq(d);
> void __iomem *io_base = irq_data_get_irq_chip_data(d);
>
> - clrbits32(io_base + FLIPPER_IMR, 1 << irq);
> + clrbits32_be(io_base + FLIPPER_IMR, 1 << irq);
> }
>
> static void flipper_pic_unmask(struct irq_data *d)
> @@ -80,7 +80,7 @@ static void flipper_pic_unmask(struct irq_data *d)
> int irq = irqd_to_hwirq(d);
> void __iomem *io_base = irq_data_get_irq_chip_data(d);
>
> - setbits32(io_base + FLIPPER_IMR, 1 << irq);
> + setbits32_be(io_base + FLIPPER_IMR, 1 << irq);
> }
>
>
> diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
> index 8112b39879d6..5487710bed1c 100644
> --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
> +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
> @@ -50,7 +50,7 @@ static void hlwd_pic_mask_and_ack(struct irq_data *d)
> void __iomem *io_base = irq_data_get_irq_chip_data(d);
> u32 mask = 1 << irq;
>
> - clrbits32(io_base + HW_BROADWAY_IMR, mask);
> + clrbits32_be(io_base + HW_BROADWAY_IMR, mask);
> out_be32(io_base + HW_BROADWAY_ICR, mask);
> }
>
> @@ -67,7 +67,7 @@ static void hlwd_pic_mask(struct irq_data *d)
> int irq = irqd_to_hwirq(d);
> void __iomem *io_base = irq_data_get_irq_chip_data(d);
>
> - clrbits32(io_base + HW_BROADWAY_IMR, 1 << irq);
> + clrbits32_be(io_base + HW_BROADWAY_IMR, 1 << irq);
> }
>
> static void hlwd_pic_unmask(struct irq_data *d)
> @@ -75,10 +75,10 @@ static void hlwd_pic_unmask(struct irq_data *d)
> int irq = irqd_to_hwirq(d);
> void __iomem *io_base = irq_data_get_irq_chip_data(d);
>
> - setbits32(io_base + HW_BROADWAY_IMR, 1 << irq);
> + setbits32_be(io_base + HW_BROADWAY_IMR, 1 << irq);
>
> /* Make sure the ARM (aka. Starlet) doesn't handle this interrupt. */
> - clrbits32(io_base + HW_STARLET_IMR, 1 << irq);
> + clrbits32_be(io_base + HW_STARLET_IMR, 1 << irq);
> }
>
>
> diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
> index 403523c061ba..dd511e19147a 100644
> --- a/arch/powerpc/platforms/embedded6xx/wii.c
> +++ b/arch/powerpc/platforms/embedded6xx/wii.c
> @@ -134,7 +134,7 @@ static void __init wii_setup_arch(void)
> hw_gpio = wii_ioremap_hw_regs("hw_gpio", HW_GPIO_COMPATIBLE);
> if (hw_gpio) {
> /* turn off the front blue led and IR light */
> - clrbits32(hw_gpio + HW_GPIO_OUT(0),
> + clrbits32_be(hw_gpio + HW_GPIO_OUT(0),
> HW_GPIO_SLOT_LED | HW_GPIO_SENSOR_BAR);
> }
> }
> @@ -145,7 +145,7 @@ static void __noreturn wii_restart(char *cmd)
>
> if (hw_ctrl) {
> /* clear the system reset pin to cause a reset */
> - clrbits32(hw_ctrl + HW_CTRL_RESETS, HW_CTRL_RESETS_SYS);
> + clrbits32_be(hw_ctrl + HW_CTRL_RESETS, HW_CTRL_RESETS_SYS);
> }
> wii_spin();
> }
> @@ -159,13 +159,13 @@ static void wii_power_off(void)
> * set the owner of the shutdown pin to ARM, because it is
> * accessed through the registers for the ARM, below
> */
> - clrbits32(hw_gpio + HW_GPIO_OWNER, HW_GPIO_SHUTDOWN);
> + clrbits32_be(hw_gpio + HW_GPIO_OWNER, HW_GPIO_SHUTDOWN);
>
> /* make sure that the poweroff GPIO is configured as output */
> - setbits32(hw_gpio + HW_GPIO_DIR(1), HW_GPIO_SHUTDOWN);
> + setbits32_be(hw_gpio + HW_GPIO_DIR(1), HW_GPIO_SHUTDOWN);
>
> /* drive the poweroff GPIO high */
> - setbits32(hw_gpio + HW_GPIO_OUT(1), HW_GPIO_SHUTDOWN);
> + setbits32_be(hw_gpio + HW_GPIO_OUT(1), HW_GPIO_SHUTDOWN);
> }
> wii_spin();
> }
> diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c
> index 4f8dcf124828..9de5f13c51cb 100644
> --- a/arch/powerpc/sysdev/cpm1.c
> +++ b/arch/powerpc/sysdev/cpm1.c
> @@ -60,14 +60,14 @@ static void cpm_mask_irq(struct irq_data *d)
> {
> unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
>
> - clrbits32(&cpic_reg->cpic_cimr, (1 << cpm_vec));
> + clrbits32_be(&cpic_reg->cpic_cimr, (1 << cpm_vec));
> }
>
> static void cpm_unmask_irq(struct irq_data *d)
> {
> unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
>
> - setbits32(&cpic_reg->cpic_cimr, (1 << cpm_vec));
> + setbits32_be(&cpic_reg->cpic_cimr, (1 << cpm_vec));
> }
>
> static void cpm_end_irq(struct irq_data *d)
> @@ -188,7 +188,7 @@ unsigned int cpm_pic_init(void)
> if (setup_irq(eirq, &cpm_error_irqaction))
> printk(KERN_ERR "Could not allocate CPM error IRQ!");
>
> - setbits32(&cpic_reg->cpic_cicr, CICR_IEN);
> + setbits32_be(&cpic_reg->cpic_cicr, CICR_IEN);
>
> end:
> of_node_put(np);
> @@ -317,14 +317,14 @@ static void cpm1_set_pin32(int port, int pin, int flags)
> &mpc8xx_immr->im_cpm.cp_pedir;
>
> if (flags & CPM_PIN_OUTPUT)
> - setbits32(&iop->dir, pin);
> + setbits32_be(&iop->dir, pin);
> else
> - clrbits32(&iop->dir, pin);
> + clrbits32_be(&iop->dir, pin);
>
> if (!(flags & CPM_PIN_GPIO))
> - setbits32(&iop->par, pin);
> + setbits32_be(&iop->par, pin);
> else
> - clrbits32(&iop->par, pin);
> + clrbits32_be(&iop->par, pin);
>
> if (port == CPM_PORTB) {
> if (flags & CPM_PIN_OPENDRAIN)
> @@ -335,14 +335,14 @@ static void cpm1_set_pin32(int port, int pin, int flags)
>
> if (port == CPM_PORTE) {
> if (flags & CPM_PIN_SECONDARY)
> - setbits32(&iop->sor, pin);
> + setbits32_be(&iop->sor, pin);
> else
> - clrbits32(&iop->sor, pin);
> + clrbits32_be(&iop->sor, pin);
>
> if (flags & CPM_PIN_OPENDRAIN)
> - setbits32(&mpc8xx_immr->im_cpm.cp_peodr, pin);
> + setbits32_be(&mpc8xx_immr->im_cpm.cp_peodr, pin);
> else
> - clrbits32(&mpc8xx_immr->im_cpm.cp_peodr, pin);
> + clrbits32_be(&mpc8xx_immr->im_cpm.cp_peodr, pin);
> }
> }
>
> @@ -732,7 +732,7 @@ static int cpm1_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
>
> spin_lock_irqsave(&cpm1_gc->lock, flags);
>
> - setbits32(&iop->dir, pin_mask);
> + setbits32_be(&iop->dir, pin_mask);
> __cpm1_gpio32_set(mm_gc, pin_mask, val);
>
> spin_unlock_irqrestore(&cpm1_gc->lock, flags);
> @@ -750,7 +750,7 @@ static int cpm1_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio)
>
> spin_lock_irqsave(&cpm1_gc->lock, flags);
>
> - clrbits32(&iop->dir, pin_mask);
> + clrbits32_be(&iop->dir, pin_mask);
>
> spin_unlock_irqrestore(&cpm1_gc->lock, flags);
>
> diff --git a/arch/powerpc/sysdev/cpm2.c b/arch/powerpc/sysdev/cpm2.c
> index 07718b9a2c99..445d6e45a6de 100644
> --- a/arch/powerpc/sysdev/cpm2.c
> +++ b/arch/powerpc/sysdev/cpm2.c
> @@ -335,22 +335,22 @@ void cpm2_set_pin(int port, int pin, int flags)
> pin = 1 << (31 - pin);
>
> if (flags & CPM_PIN_OUTPUT)
> - setbits32(&iop[port].dir, pin);
> + setbits32_be(&iop[port].dir, pin);
> else
> - clrbits32(&iop[port].dir, pin);
> + clrbits32_be(&iop[port].dir, pin);
>
> if (!(flags & CPM_PIN_GPIO))
> - setbits32(&iop[port].par, pin);
> + setbits32_be(&iop[port].par, pin);
> else
> - clrbits32(&iop[port].par, pin);
> + clrbits32_be(&iop[port].par, pin);
>
> if (flags & CPM_PIN_SECONDARY)
> - setbits32(&iop[port].sor, pin);
> + setbits32_be(&iop[port].sor, pin);
> else
> - clrbits32(&iop[port].sor, pin);
> + clrbits32_be(&iop[port].sor, pin);
>
> if (flags & CPM_PIN_OPENDRAIN)
> - setbits32(&iop[port].odr, pin);
> + setbits32_be(&iop[port].odr, pin);
> else
> - clrbits32(&iop[port].odr, pin);
> + clrbits32_be(&iop[port].odr, pin);
> }
> diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c
> index b74508175b67..d36a95708aaf 100644
> --- a/arch/powerpc/sysdev/cpm_common.c
> +++ b/arch/powerpc/sysdev/cpm_common.c
> @@ -165,7 +165,7 @@ static int cpm2_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
>
> spin_lock_irqsave(&cpm2_gc->lock, flags);
>
> - setbits32(&iop->dir, pin_mask);
> + setbits32_be(&iop->dir, pin_mask);
> __cpm2_gpio32_set(mm_gc, pin_mask, val);
>
> spin_unlock_irqrestore(&cpm2_gc->lock, flags);
> @@ -183,7 +183,7 @@ static int cpm2_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio)
>
> spin_lock_irqsave(&cpm2_gc->lock, flags);
>
> - clrbits32(&iop->dir, pin_mask);
> + clrbits32_be(&iop->dir, pin_mask);
>
> spin_unlock_irqrestore(&cpm2_gc->lock, flags);
>
> diff --git a/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c b/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c
> index c27058e5df26..2b7e2b4a2543 100644
> --- a/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c
> +++ b/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c
> @@ -124,23 +124,23 @@ static int mpc85xx_l2ctlr_of_probe(struct platform_device *dev)
>
> switch (ways) {
> case LOCK_WAYS_EIGHTH:
> - setbits32(&l2ctlr->ctl,
> + setbits32_be(&l2ctlr->ctl,
> L2CR_L2E | L2CR_L2FI | L2CR_SRAM_EIGHTH);
> break;
>
> case LOCK_WAYS_TWO_EIGHTH:
> - setbits32(&l2ctlr->ctl,
> + setbits32_be(&l2ctlr->ctl,
> L2CR_L2E | L2CR_L2FI | L2CR_SRAM_QUART);
> break;
>
> case LOCK_WAYS_HALF:
> - setbits32(&l2ctlr->ctl,
> + setbits32_be(&l2ctlr->ctl,
> L2CR_L2E | L2CR_L2FI | L2CR_SRAM_HALF);
> break;
>
> case LOCK_WAYS_FULL:
> default:
> - setbits32(&l2ctlr->ctl,
> + setbits32_be(&l2ctlr->ctl,
> L2CR_L2E | L2CR_L2FI | L2CR_SRAM_FULL);
> break;
> }
> diff --git a/arch/powerpc/sysdev/fsl_lbc.c b/arch/powerpc/sysdev/fsl_lbc.c
> index 5340a483cf55..994233e41b91 100644
> --- a/arch/powerpc/sysdev/fsl_lbc.c
> +++ b/arch/powerpc/sysdev/fsl_lbc.c
> @@ -192,7 +192,7 @@ static int fsl_lbc_ctrl_init(struct fsl_lbc_ctrl *ctrl,
> struct fsl_lbc_regs __iomem *lbc = ctrl->regs;
>
> /* clear event registers */
> - setbits32(&lbc->ltesr, LTESR_CLEAR);
> + setbits32_be(&lbc->ltesr, LTESR_CLEAR);
> out_be32(&lbc->lteatr, 0);
> out_be32(&lbc->ltear, 0);
> out_be32(&lbc->lteccr, LTECCR_CLEAR);
> diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
> index 918be816b097..17aa5ee63d34 100644
> --- a/arch/powerpc/sysdev/fsl_pci.c
> +++ b/arch/powerpc/sysdev/fsl_pci.c
> @@ -1196,11 +1196,11 @@ static int fsl_pci_pme_probe(struct pci_controller *hose)
> pci = hose->private_data;
>
> /* Enable PTOD, ENL23D & EXL23D */
> - clrbits32(&pci->pex_pme_mes_disr,
> + clrbits32_be(&pci->pex_pme_mes_disr,
> PME_DISR_EN_PTOD | PME_DISR_EN_ENL23D | PME_DISR_EN_EXL23D);
>
> out_be32(&pci->pex_pme_mes_ier, 0);
> - setbits32(&pci->pex_pme_mes_ier,
> + setbits32_be(&pci->pex_pme_mes_ier,
> PME_DISR_EN_PTOD | PME_DISR_EN_ENL23D | PME_DISR_EN_EXL23D);
>
> /* PME Enable */
> @@ -1218,7 +1218,7 @@ static void send_pme_turnoff_message(struct pci_controller *hose)
> int i;
>
> /* Send PME_Turn_Off Message Request */
> - setbits32(&pci->pex_pmcr, PEX_PMCR_PTOMR);
> + setbits32_be(&pci->pex_pmcr, PEX_PMCR_PTOMR);
>
> /* Wait trun off done */
> for (i = 0; i < 150; i++) {
> @@ -1254,7 +1254,7 @@ static void fsl_pci_syscore_do_resume(struct pci_controller *hose)
> int i;
>
> /* Send Exit L2 State Message */
> - setbits32(&pci->pex_pmcr, PEX_PMCR_EXL2S);
> + setbits32_be(&pci->pex_pmcr, PEX_PMCR_EXL2S);
>
> /* Wait exit done */
> for (i = 0; i < 150; i++) {
> diff --git a/arch/powerpc/sysdev/fsl_pmc.c b/arch/powerpc/sysdev/fsl_pmc.c
> index 232225e7f863..bbcf4cb89bb6 100644
> --- a/arch/powerpc/sysdev/fsl_pmc.c
> +++ b/arch/powerpc/sysdev/fsl_pmc.c
> @@ -37,7 +37,7 @@ static int pmc_suspend_enter(suspend_state_t state)
> {
> int ret;
>
> - setbits32(&pmc_regs->pmcsr, PMCSR_SLP);
> + setbits32_be(&pmc_regs->pmcsr, PMCSR_SLP);
> /* At this point, the CPU is asleep. */
>
> /* Upon resume, wait for SLP bit to be clear. */
> diff --git a/arch/powerpc/sysdev/fsl_rcpm.c b/arch/powerpc/sysdev/fsl_rcpm.c
> index 9259a94f70e1..bd2a7606bfce 100644
> --- a/arch/powerpc/sysdev/fsl_rcpm.c
> +++ b/arch/powerpc/sysdev/fsl_rcpm.c
> @@ -33,10 +33,10 @@ static void rcpm_v1_irq_mask(int cpu)
> int hw_cpu = get_hard_smp_processor_id(cpu);
> unsigned int mask = 1 << hw_cpu;
>
> - setbits32(&rcpm_v1_regs->cpmimr, mask);
> - setbits32(&rcpm_v1_regs->cpmcimr, mask);
> - setbits32(&rcpm_v1_regs->cpmmcmr, mask);
> - setbits32(&rcpm_v1_regs->cpmnmimr, mask);
> + setbits32_be(&rcpm_v1_regs->cpmimr, mask);
> + setbits32_be(&rcpm_v1_regs->cpmcimr, mask);
> + setbits32_be(&rcpm_v1_regs->cpmmcmr, mask);
> + setbits32_be(&rcpm_v1_regs->cpmnmimr, mask);
> }
>
> static void rcpm_v2_irq_mask(int cpu)
> @@ -44,10 +44,10 @@ static void rcpm_v2_irq_mask(int cpu)
> int hw_cpu = get_hard_smp_processor_id(cpu);
> unsigned int mask = 1 << hw_cpu;
>
> - setbits32(&rcpm_v2_regs->tpmimr0, mask);
> - setbits32(&rcpm_v2_regs->tpmcimr0, mask);
> - setbits32(&rcpm_v2_regs->tpmmcmr0, mask);
> - setbits32(&rcpm_v2_regs->tpmnmimr0, mask);
> + setbits32_be(&rcpm_v2_regs->tpmimr0, mask);
> + setbits32_be(&rcpm_v2_regs->tpmcimr0, mask);
> + setbits32_be(&rcpm_v2_regs->tpmmcmr0, mask);
> + setbits32_be(&rcpm_v2_regs->tpmnmimr0, mask);
> }
>
> static void rcpm_v1_irq_unmask(int cpu)
> @@ -55,10 +55,10 @@ static void rcpm_v1_irq_unmask(int cpu)
> int hw_cpu = get_hard_smp_processor_id(cpu);
> unsigned int mask = 1 << hw_cpu;
>
> - clrbits32(&rcpm_v1_regs->cpmimr, mask);
> - clrbits32(&rcpm_v1_regs->cpmcimr, mask);
> - clrbits32(&rcpm_v1_regs->cpmmcmr, mask);
> - clrbits32(&rcpm_v1_regs->cpmnmimr, mask);
> + clrbits32_be(&rcpm_v1_regs->cpmimr, mask);
> + clrbits32_be(&rcpm_v1_regs->cpmcimr, mask);
> + clrbits32_be(&rcpm_v1_regs->cpmmcmr, mask);
> + clrbits32_be(&rcpm_v1_regs->cpmnmimr, mask);
> }
>
> static void rcpm_v2_irq_unmask(int cpu)
> @@ -66,26 +66,26 @@ static void rcpm_v2_irq_unmask(int cpu)
> int hw_cpu = get_hard_smp_processor_id(cpu);
> unsigned int mask = 1 << hw_cpu;
>
> - clrbits32(&rcpm_v2_regs->tpmimr0, mask);
> - clrbits32(&rcpm_v2_regs->tpmcimr0, mask);
> - clrbits32(&rcpm_v2_regs->tpmmcmr0, mask);
> - clrbits32(&rcpm_v2_regs->tpmnmimr0, mask);
> + clrbits32_be(&rcpm_v2_regs->tpmimr0, mask);
> + clrbits32_be(&rcpm_v2_regs->tpmcimr0, mask);
> + clrbits32_be(&rcpm_v2_regs->tpmmcmr0, mask);
> + clrbits32_be(&rcpm_v2_regs->tpmnmimr0, mask);
> }
>
> static void rcpm_v1_set_ip_power(bool enable, u32 mask)
> {
> if (enable)
> - setbits32(&rcpm_v1_regs->ippdexpcr, mask);
> + setbits32_be(&rcpm_v1_regs->ippdexpcr, mask);
> else
> - clrbits32(&rcpm_v1_regs->ippdexpcr, mask);
> + clrbits32_be(&rcpm_v1_regs->ippdexpcr, mask);
> }
>
> static void rcpm_v2_set_ip_power(bool enable, u32 mask)
> {
> if (enable)
> - setbits32(&rcpm_v2_regs->ippdexpcr[0], mask);
> + setbits32_be(&rcpm_v2_regs->ippdexpcr[0], mask);
> else
> - clrbits32(&rcpm_v2_regs->ippdexpcr[0], mask);
> + clrbits32_be(&rcpm_v2_regs->ippdexpcr[0], mask);
> }
>
> static void rcpm_v1_cpu_enter_state(int cpu, int state)
> @@ -95,10 +95,10 @@ static void rcpm_v1_cpu_enter_state(int cpu, int state)
>
> switch (state) {
> case E500_PM_PH10:
> - setbits32(&rcpm_v1_regs->cdozcr, mask);
> + setbits32_be(&rcpm_v1_regs->cdozcr, mask);
> break;
> case E500_PM_PH15:
> - setbits32(&rcpm_v1_regs->cnapcr, mask);
> + setbits32_be(&rcpm_v1_regs->cnapcr, mask);
> break;
> default:
> pr_warn("Unknown cpu PM state (%d)\n", state);
> @@ -114,16 +114,16 @@ static void rcpm_v2_cpu_enter_state(int cpu, int state)
> switch (state) {
> case E500_PM_PH10:
> /* one bit corresponds to one thread for PH10 of 6500 */
> - setbits32(&rcpm_v2_regs->tph10setr0, 1 << hw_cpu);
> + setbits32_be(&rcpm_v2_regs->tph10setr0, 1 << hw_cpu);
> break;
> case E500_PM_PH15:
> - setbits32(&rcpm_v2_regs->pcph15setr, mask);
> + setbits32_be(&rcpm_v2_regs->pcph15setr, mask);
> break;
> case E500_PM_PH20:
> - setbits32(&rcpm_v2_regs->pcph20setr, mask);
> + setbits32_be(&rcpm_v2_regs->pcph20setr, mask);
> break;
> case E500_PM_PH30:
> - setbits32(&rcpm_v2_regs->pcph30setr, mask);
> + setbits32_be(&rcpm_v2_regs->pcph30setr, mask);
> break;
> default:
> pr_warn("Unknown cpu PM state (%d)\n", state);
> @@ -172,10 +172,10 @@ static void rcpm_v1_cpu_exit_state(int cpu, int state)
>
> switch (state) {
> case E500_PM_PH10:
> - clrbits32(&rcpm_v1_regs->cdozcr, mask);
> + clrbits32_be(&rcpm_v1_regs->cdozcr, mask);
> break;
> case E500_PM_PH15:
> - clrbits32(&rcpm_v1_regs->cnapcr, mask);
> + clrbits32_be(&rcpm_v1_regs->cnapcr, mask);
> break;
> default:
> pr_warn("Unknown cpu PM state (%d)\n", state);
> @@ -196,16 +196,16 @@ static void rcpm_v2_cpu_exit_state(int cpu, int state)
>
> switch (state) {
> case E500_PM_PH10:
> - setbits32(&rcpm_v2_regs->tph10clrr0, 1 << hw_cpu);
> + setbits32_be(&rcpm_v2_regs->tph10clrr0, 1 << hw_cpu);
> break;
> case E500_PM_PH15:
> - setbits32(&rcpm_v2_regs->pcph15clrr, mask);
> + setbits32_be(&rcpm_v2_regs->pcph15clrr, mask);
> break;
> case E500_PM_PH20:
> - setbits32(&rcpm_v2_regs->pcph20clrr, mask);
> + setbits32_be(&rcpm_v2_regs->pcph20clrr, mask);
> break;
> case E500_PM_PH30:
> - setbits32(&rcpm_v2_regs->pcph30clrr, mask);
> + setbits32_be(&rcpm_v2_regs->pcph30clrr, mask);
> break;
> default:
> pr_warn("Unknown cpu PM state (%d)\n", state);
> @@ -226,7 +226,7 @@ static int rcpm_v1_plat_enter_state(int state)
>
> switch (state) {
> case PLAT_PM_SLEEP:
> - setbits32(pmcsr_reg, RCPM_POWMGTCSR_SLP);
> + setbits32_be(pmcsr_reg, RCPM_POWMGTCSR_SLP);
>
> /* Upon resume, wait for RCPM_POWMGTCSR_SLP bit to be clear. */
> result = spin_event_timeout(
> @@ -253,9 +253,9 @@ static int rcpm_v2_plat_enter_state(int state)
> switch (state) {
> case PLAT_PM_LPM20:
> /* clear previous LPM20 status */
> - setbits32(pmcsr_reg, RCPM_POWMGTCSR_P_LPM20_ST);
> + setbits32_be(pmcsr_reg, RCPM_POWMGTCSR_P_LPM20_ST);
> /* enter LPM20 status */
> - setbits32(pmcsr_reg, RCPM_POWMGTCSR_LPM20_RQ);
> + setbits32_be(pmcsr_reg, RCPM_POWMGTCSR_LPM20_RQ);
>
> /* At this point, the device is in LPM20 status. */
>
> @@ -291,9 +291,9 @@ static void rcpm_common_freeze_time_base(u32 *tben_reg, int freeze)
>
> if (freeze) {
> mask = in_be32(tben_reg);
> - clrbits32(tben_reg, mask);
> + clrbits32_be(tben_reg, mask);
> } else {
> - setbits32(tben_reg, mask);
> + setbits32_be(tben_reg, mask);
> }
>
> /* read back to push the previous write */
> diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
> index 5011ffea4e4b..278e63cc8afe 100644
> --- a/arch/powerpc/sysdev/fsl_rio.c
> +++ b/arch/powerpc/sysdev/fsl_rio.c
> @@ -668,10 +668,10 @@ int fsl_rio_setup(struct platform_device *dev)
> out_be32(priv->regs_win
> + RIO_CCSR + i*0x20, 0);
> /* Set 1x lane */
> - setbits32(priv->regs_win
> + setbits32_be(priv->regs_win
> + RIO_CCSR + i*0x20, 0x02000000);
> /* Enable ports */
> - setbits32(priv->regs_win
> + setbits32_be(priv->regs_win
> + RIO_CCSR + i*0x20, 0x00600000);
> msleep(100);
> if (in_be32((priv->regs_win
> diff --git a/arch/powerpc/sysdev/fsl_rmu.c b/arch/powerpc/sysdev/fsl_rmu.c
> index 88b35a3dcdc5..134ba53f0fcb 100644
> --- a/arch/powerpc/sysdev/fsl_rmu.c
> +++ b/arch/powerpc/sysdev/fsl_rmu.c
> @@ -355,7 +355,7 @@ fsl_rio_dbell_handler(int irq, void *dev_instance)
> dmsg->sid, dmsg->tid,
> dmsg->info);
> }
> - setbits32(&fsl_dbell->dbell_regs->dmr, DOORBELL_DMR_DI);
> + setbits32_be(&fsl_dbell->dbell_regs->dmr, DOORBELL_DMR_DI);
> out_be32(&fsl_dbell->dbell_regs->dsr, DOORBELL_DSR_DIQI);
> }
>
> @@ -909,10 +909,10 @@ fsl_open_inb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entries)
> out_be32(&rmu->msg_regs->imr, 0x001b0060);
>
> /* Set number of queue entries */
> - setbits32(&rmu->msg_regs->imr, (get_bitmask_order(entries) - 2) << 12);
> + setbits32_be(&rmu->msg_regs->imr, (get_bitmask_order(entries) - 2) << 12);
>
> /* Now enable the unit */
> - setbits32(&rmu->msg_regs->imr, 0x1);
> + setbits32_be(&rmu->msg_regs->imr, 0x1);
>
> out:
> return rc;
> @@ -1015,7 +1015,7 @@ void *fsl_get_inb_message(struct rio_mport *mport, int mbox)
> rmu->msg_rx_ring.virt_buffer[buf_idx] = NULL;
>
> out1:
> - setbits32(&rmu->msg_regs->imr, RIO_MSG_IMR_MI);
> + setbits32_be(&rmu->msg_regs->imr, RIO_MSG_IMR_MI);
>
> out2:
> return buf;
> diff --git a/arch/powerpc/sysdev/mpic_timer.c b/arch/powerpc/sysdev/mpic_timer.c
> index 87e7c42777a8..70b02ba90220 100644
> --- a/arch/powerpc/sysdev/mpic_timer.c
> +++ b/arch/powerpc/sysdev/mpic_timer.c
> @@ -154,7 +154,7 @@ static int set_cascade_timer(struct timer_group_priv *priv, u64 ticks,
>
> tcr = casc_priv->tcr_value |
> (casc_priv->tcr_value << MPIC_TIMER_TCR_ROVR_OFFSET);
> - setbits32(priv->group_tcr, tcr);
> + setbits32_be(priv->group_tcr, tcr);
>
> tmp_ticks = div_u64_rem(ticks, MAX_TICKS_CASCADE, &rem_ticks);
>
> @@ -253,7 +253,7 @@ void mpic_start_timer(struct mpic_timer *handle)
> struct timer_group_priv *priv = container_of(handle,
> struct timer_group_priv, timer[handle->num]);
>
> - clrbits32(&priv->regs[handle->num].gtbcr, TIMER_STOP);
> + clrbits32_be(&priv->regs[handle->num].gtbcr, TIMER_STOP);
> }
> EXPORT_SYMBOL(mpic_start_timer);
>
> @@ -269,7 +269,7 @@ void mpic_stop_timer(struct mpic_timer *handle)
> struct timer_group_priv, timer[handle->num]);
> struct cascade_priv *casc_priv;
>
> - setbits32(&priv->regs[handle->num].gtbcr, TIMER_STOP);
> + setbits32_be(&priv->regs[handle->num].gtbcr, TIMER_STOP);
>
> casc_priv = priv->timer[handle->num].cascade_handle;
> if (casc_priv) {
> @@ -340,7 +340,7 @@ void mpic_free_timer(struct mpic_timer *handle)
> u32 tcr;
> tcr = casc_priv->tcr_value | (casc_priv->tcr_value <<
> MPIC_TIMER_TCR_ROVR_OFFSET);
> - clrbits32(priv->group_tcr, tcr);
> + clrbits32_be(priv->group_tcr, tcr);
> priv->idle |= casc_priv->cascade_map;
> priv->timer[handle->num].cascade_handle = NULL;
> } else {
> @@ -508,7 +508,7 @@ static void timer_group_init(struct device_node *np)
>
> /* Init FSL timer hardware */
> if (priv->flags & FSL_GLOBAL_TIMER)
> - setbits32(priv->group_tcr, MPIC_TIMER_TCR_CLKDIV);
> + setbits32_be(priv->group_tcr, MPIC_TIMER_TCR_CLKDIV);
>
> list_add_tail(&priv->node, &timer_group_list);
>
> @@ -531,7 +531,7 @@ static void mpic_timer_resume(void)
> list_for_each_entry(priv, &timer_group_list, node) {
> /* Init FSL timer hardware */
> if (priv->flags & FSL_GLOBAL_TIMER)
> - setbits32(priv->group_tcr, MPIC_TIMER_TCR_CLKDIV);
> + setbits32_be(priv->group_tcr, MPIC_TIMER_TCR_CLKDIV);
> }
> }
>
>
^ permalink raw reply
* Re: [PATCH net-next 11/11] vhost_net: batch submitting XDP buffers to underlayer sockets
From: Jason Wang @ 2018-09-10 3:47 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, kvm, virtualization
In-Reply-To: <20180907121148-mutt-send-email-mst@kernel.org>
On 2018年09月08日 00:13, Michael S. Tsirkin wrote:
> On Fri, Sep 07, 2018 at 03:41:52PM +0800, Jason Wang wrote:
>>>> @@ -556,10 +667,14 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
>>>> size_t len, total_len = 0;
>>>> int err;
>>>> int sent_pkts = 0;
>>>> + bool bulking = (sock->sk->sk_sndbuf == INT_MAX);
>>> What does bulking mean?
>> The name is misleading, it means whether we can do batching. For simplicity,
>> I disable batching is sndbuf is not INT_MAX.
> But what does batching have to do with sndbuf?
If we want to do batching with sndbuf, sockets needs to return the
number of packets that was successfully sent. And vhost need to examine
the value.
Consider performance won't be good if sndbuf is limited, I don't
implement this for simplicity.
>
>>>> for (;;) {
>>>> bool busyloop_intr = false;
>>>> + if (nvq->done_idx == VHOST_NET_BATCH)
>>>> + vhost_tx_batch(net, nvq, sock, &msg);
>>>> +
>>>> head = get_tx_bufs(net, nvq, &msg, &out, &in, &len,
>>>> &busyloop_intr);
>>>> /* On error, stop handling until the next kick. */
>>>> @@ -577,14 +692,34 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
>>>> break;
>>>> }
>>>> - vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
>>>> - vq->heads[nvq->done_idx].len = 0;
>>>> -
>>>> total_len += len;
>>>> - if (tx_can_batch(vq, total_len))
>>>> - msg.msg_flags |= MSG_MORE;
>>>> - else
>>>> - msg.msg_flags &= ~MSG_MORE;
>>>> +
>>>> + /* For simplicity, TX batching is only enabled if
>>>> + * sndbuf is unlimited.
>>> What if sndbuf changes while this processing is going on?
>> We will get the correct sndbuf in the next run of handle_tx(). I think this
>> is safe.
> If it's safe why bother with special-casing INT_MAX?
>
The difference is handle_tx() won't loop forever and will recognize the
new value next time, we have a quota to limit this.
Thanks
^ permalink raw reply
* Re: [PATCH net-next 04/11] tuntap: simplify error handling in tun_build_skb()
From: Jason Wang @ 2018-09-10 3:44 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, kvm, virtualization
In-Reply-To: <20180907101645-mutt-send-email-mst@kernel.org>
On 2018年09月07日 22:17, Michael S. Tsirkin wrote:
> On Fri, Sep 07, 2018 at 11:22:00AM +0800, Jason Wang wrote:
>>>> @@ -1668,6 +1668,9 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>>>> if (copied != len)
>>>> return ERR_PTR(-EFAULT);
>>>> + get_page(alloc_frag->page);
>>>> + alloc_frag->offset += buflen;
>>>> +
>>> This adds an atomic op on XDP_DROP which is a data path
>>> operation for some workloads.
>> Yes, I have patch on top to amortize this, the idea is to have a very big
>> refcount once after the frag was allocated and maintain a bias and decrease
>> them all when allocating new frags.'
> Why bother with refcounting for a drop though? It should be simple.
>
Right, let me fix this.
Thanks
^ permalink raw reply
* Re: [PATCH net-next 06/11] tuntap: split out XDP logic
From: Jason Wang @ 2018-09-10 3:43 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, kvm, virtualization
In-Reply-To: <20180907101606-mutt-send-email-mst@kernel.org>
On 2018年09月07日 22:16, Michael S. Tsirkin wrote:
> On Fri, Sep 07, 2018 at 11:29:34AM +0800, Jason Wang wrote:
>>>> + if (act != XDP_PASS)
>>>> + goto out;
>>> likely?
>> It depends on the XDP program, so I tend not to use it.
> Point is XDP_PASS is already slow.
>
Ok.
Thanks
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [virtio-dev] Re: [PATCH net-next v2 0/5] virtio: support packed ring
From: Jason Wang @ 2018-09-10 3:33 UTC (permalink / raw)
To: Tiwei Bie, Michael S. Tsirkin
Cc: virtualization, linux-kernel, netdev, virtio-dev, wexu, jfreimann
In-Reply-To: <20180910030053.GA15645@debian>
On 2018年09月10日 11:00, Tiwei Bie wrote:
> On Fri, Sep 07, 2018 at 09:00:49AM -0400, Michael S. Tsirkin wrote:
>> On Fri, Sep 07, 2018 at 09:22:25AM +0800, Tiwei Bie wrote:
>>> On Mon, Aug 27, 2018 at 05:00:40PM +0300, Michael S. Tsirkin wrote:
>>>> Are there still plans to test the performance with vost pmd?
>>>> vhost doesn't seem to show a performance gain ...
>>>>
>>> I tried some performance tests with vhost PMD. In guest, the
>>> XDP program will return XDP_DROP directly. And in host, testpmd
>>> will do txonly fwd.
>>>
>>> When burst size is 1 and packet size is 64 in testpmd and
>>> testpmd needs to iterate 5 Tx queues (but only the first two
>>> queues are enabled) to prepare and inject packets, I got ~12%
>>> performance boost (5.7Mpps -> 6.4Mpps). And if the vhost PMD
>>> is faster (e.g. just need to iterate the first two queues to
>>> prepare and inject packets), then I got similar performance
>>> for both rings (~9.9Mpps) (packed ring's performance can be
>>> lower, because it's more complicated in driver.)
>>>
>>> I think packed ring makes vhost PMD faster, but it doesn't make
>>> the driver faster. In packed ring, the ring is simplified, and
>>> the handling of the ring in vhost (device) is also simplified,
>>> but things are not simplified in driver, e.g. although there is
>>> no desc table in the virtqueue anymore, driver still needs to
>>> maintain a private desc state table (which is still managed as
>>> a list in this patch set) to support the out-of-order desc
>>> processing in vhost (device).
>>>
>>> I think this patch set is mainly to make the driver have a full
>>> functional support for the packed ring, which makes it possible
>>> to leverage the packed ring feature in vhost (device). But I'm
>>> not sure whether there is any other better idea, I'd like to
>>> hear your thoughts. Thanks!
>> Just this: Jens seems to report a nice gain with virtio and
>> vhost pmd across the board. Try to compare virtio and
>> virtio pmd to see what does pmd do better?
> The virtio PMD (drivers/net/virtio) in DPDK doesn't need to share
> the virtio ring operation code with other drivers and is highly
> optimized for network. E.g. in Rx, the Rx burst function won't
> chain descs. So the ID management for the Rx ring can be quite
> simple and straightforward, we just need to initialize these IDs
> when initializing the ring and don't need to change these IDs
> in data path anymore (the mergable Rx code in that patch set
> assumes the descs will be written back in order, which should be
> fixed. I.e., the ID in the desc should be used to index vq->descx[]).
> The Tx code in that patch set also assumes the descs will be
> written back by device in order, which should be fixed.
Yes it is. I think I've pointed it out in some early version of pmd
patch. So I suspect part (or all) of the boost may come from in order
feature.
>
> But in kernel virtio driver, the virtio_ring.c is very generic.
> The enqueue (virtqueue_add()) and dequeue (virtqueue_get_buf_ctx())
> functions need to support all the virtio devices and should be
> able to handle all the possible cases that may happen. So although
> the packed ring can be very efficient in some cases, currently
> the room to optimize the performance in kernel's virtio_ring.c
> isn't that much. If we want to take the fully advantage of the
> packed ring's efficiency, we need some further e.g. API changes
> in virtio_ring.c, which shouldn't be part of this patch set.
Could you please share more thoughts on this e.g how to improve the API?
Notice since the API is shared by both split ring and packed ring, it
may improve the performance of split ring as well. One can easily
imagine a batching API, but it does not have many real users now, the
only case is the XDP transmission which can accept an array of XDP frames.
> So
> I still think this patch set is mainly to make the kernel virtio
> driver to have a full functional support of the packed ring, and
> we can't expect impressive performance boost with it.
We can only gain when virtio ring layout is the bottleneck. If there're
bottlenecks elsewhere, we probably won't see any increasing in the
numbers. Vhost-net is an example, and lots of optimizations have proved
that virtio ring is not the main bottleneck for the current codes. I
suspect it also the case of virtio driver. Did perf tell us any
interesting things in virtio driver?
Thanks
>
>>
>>>> On Wed, Jul 11, 2018 at 10:27:06AM +0800, Tiwei Bie wrote:
>>>>> Hello everyone,
>>>>>
>>>>> This patch set implements packed ring support in virtio driver.
>>>>>
>>>>> Some functional tests have been done with Jason's
>>>>> packed ring implementation in vhost:
>>>>>
>>>>> https://lkml.org/lkml/2018/7/3/33
>>>>>
>>>>> Both of ping and netperf worked as expected.
>>>>>
>>>>> v1 -> v2:
>>>>> - Use READ_ONCE() to read event off_wrap and flags together (Jason);
>>>>> - Add comments related to ccw (Jason);
>>>>>
>>>>> RFC (v6) -> v1:
>>>>> - Avoid extra virtio_wmb() in virtqueue_enable_cb_delayed_packed()
>>>>> when event idx is off (Jason);
>>>>> - Fix bufs calculation in virtqueue_enable_cb_delayed_packed() (Jason);
>>>>> - Test the state of the desc at used_idx instead of last_used_idx
>>>>> in virtqueue_enable_cb_delayed_packed() (Jason);
>>>>> - Save wrap counter (as part of queue state) in the return value
>>>>> of virtqueue_enable_cb_prepare_packed();
>>>>> - Refine the packed ring definitions in uapi;
>>>>> - Rebase on the net-next tree;
>>>>>
>>>>> RFC v5 -> RFC v6:
>>>>> - Avoid tracking addr/len/flags when DMA API isn't used (MST/Jason);
>>>>> - Define wrap counter as bool (Jason);
>>>>> - Use ALIGN() in vring_init_packed() (Jason);
>>>>> - Avoid using pointer to track `next` in detach_buf_packed() (Jason);
>>>>> - Add comments for barriers (Jason);
>>>>> - Don't enable RING_PACKED on ccw for now (noticed by Jason);
>>>>> - Refine the memory barrier in virtqueue_poll();
>>>>> - Add a missing memory barrier in virtqueue_enable_cb_delayed_packed();
>>>>> - Remove the hacks in virtqueue_enable_cb_prepare_packed();
>>>>>
>>>>> RFC v4 -> RFC v5:
>>>>> - Save DMA addr, etc in desc state (Jason);
>>>>> - Track used wrap counter;
>>>>>
>>>>> RFC v3 -> RFC v4:
>>>>> - Make ID allocation support out-of-order (Jason);
>>>>> - Various fixes for EVENT_IDX support;
>>>>>
>>>>> RFC v2 -> RFC v3:
>>>>> - Split into small patches (Jason);
>>>>> - Add helper virtqueue_use_indirect() (Jason);
>>>>> - Just set id for the last descriptor of a list (Jason);
>>>>> - Calculate the prev in virtqueue_add_packed() (Jason);
>>>>> - Fix/improve desc suppression code (Jason/MST);
>>>>> - Refine the code layout for XXX_split/packed and wrappers (MST);
>>>>> - Fix the comments and API in uapi (MST);
>>>>> - Remove the BUG_ON() for indirect (Jason);
>>>>> - Some other refinements and bug fixes;
>>>>>
>>>>> RFC v1 -> RFC v2:
>>>>> - Add indirect descriptor support - compile test only;
>>>>> - Add event suppression supprt - compile test only;
>>>>> - Move vring_packed_init() out of uapi (Jason, MST);
>>>>> - Merge two loops into one in virtqueue_add_packed() (Jason);
>>>>> - Split vring_unmap_one() for packed ring and split ring (Jason);
>>>>> - Avoid using '%' operator (Jason);
>>>>> - Rename free_head -> next_avail_idx (Jason);
>>>>> - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
>>>>> - Some other refinements and bug fixes;
>>>>>
>>>>> Thanks!
>>>>>
>>>>> Tiwei Bie (5):
>>>>> virtio: add packed ring definitions
>>>>> virtio_ring: support creating packed ring
>>>>> virtio_ring: add packed ring support
>>>>> virtio_ring: add event idx support in packed ring
>>>>> virtio_ring: enable packed ring
>>>>>
>>>>> drivers/s390/virtio/virtio_ccw.c | 14 +
>>>>> drivers/virtio/virtio_ring.c | 1365 ++++++++++++++++++++++------
>>>>> include/linux/virtio_ring.h | 8 +-
>>>>> include/uapi/linux/virtio_config.h | 3 +
>>>>> include/uapi/linux/virtio_ring.h | 43 +
>>>>> 5 files changed, 1157 insertions(+), 276 deletions(-)
>>>>>
>>>>> --
>>>>> 2.18.0
>>>> ---------------------------------------------------------------------
>>>> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
>>>> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>>>>
^ permalink raw reply
* [PATCH] ath9k: debug: remove set but not used variable 'fops_dump_nfcal'
From: YueHaibing @ 2018-09-10 3:11 UTC (permalink / raw)
To: ath9k-devel, kvalo
Cc: linux-kernel, netdev, davem, linux-wireless, YueHaibing
'fops_dump_nfcal' is not used since commit 4447d815fd0f ("ath: ath9k: use debugfs_create_devm_seqfile() helper for seq_file entries")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
---
drivers/net/wireless/ath/ath9k/debug.c | 13 -------------
1 file changed, 13 deletions(-)
diff --git a/drivers/net/wireless/ath/ath9k/debug.c b/drivers/net/wireless/ath/ath9k/debug.c
index 0a6eb8a..c871b7e 100644
--- a/drivers/net/wireless/ath/ath9k/debug.c
+++ b/drivers/net/wireless/ath/ath9k/debug.c
@@ -990,19 +990,6 @@ static int read_file_dump_nfcal(struct seq_file *file, void *data)
return 0;
}
-static int open_file_dump_nfcal(struct inode *inode, struct file *f)
-{
- return single_open(f, read_file_dump_nfcal, inode->i_private);
-}
-
-static const struct file_operations fops_dump_nfcal = {
- .read = seq_read,
- .open = open_file_dump_nfcal,
- .owner = THIS_MODULE,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
#ifdef CONFIG_ATH9K_BTCOEX_SUPPORT
static ssize_t read_file_btcoex(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
--
2.7.0
^ permalink raw reply related
* Re: [PATCH net-next] net: dsa: Expose tagging protocol to user-space
From: Andrew Lunn @ 2018-09-10 3:04 UTC (permalink / raw)
To: Jiri Pirko
Cc: Florian Fainelli, netdev, Vivien Didelot, David S. Miller,
open list
In-Reply-To: <20180908094331.GB3246@nanopsycho.orion>
On Sat, Sep 08, 2018 at 11:43:31AM +0200, Jiri Pirko wrote:
> Fri, Sep 07, 2018 at 08:09:02PM CEST, f.fainelli@gmail.com wrote:
> >There is no way for user-space to know what a given DSA network device's
> >tagging protocol is. Expose this information through a dsa/tagging
> >attribute which reflects the tagging protocol currently in use.
> >
> >This is helpful for configuration (e.g: none behaves dramatically
> >different wrt. bridges) as well as for packet capture tools when there
> >is not a proper Ethernet type available.
>
>
> Hmm, I wonder. It this something that varies between ports of an
> individual ASIC? Or is it rather something defined per-ASIC. If so, this
> looks more like a devlink-api material.
Hi Jiri
This is between the CPU ethernet device and the switch port that
interface is connect to.
For the Marvell devices, any switch port can be connected to the CPU
Ethernet interface, and the same protocol is used. However, some
switches have a special port which should be used to connect to the
CPU ethernet and supports this tagging protocol. If the designer gets
it wrong and uses a different port to connect the CPU, no tagging
protocol can be used, which as Florian indicated, has a big impact on
bridging, etc.
And just to make it more complex, Marvel has two tagging
schemes. Older devices use DSA, newer devices uses EDSA. However, for
the very new devices in the 6390 family, Marvell made a subtle change
to how EDSA works, which broke it, so we had to go back to DSA.
Of the different tagging protocols used by the 50 or so switches Linux
supports, only the EDSA tagging protocol makes use of an
Ethertype. tcpdump knows how to decode these packets. For all the
other tagging protocols, it has no idea, the Ethertype is all messed
up, and it just prints hex. What i think Florian wants to do is stuff
the tagging protocol into the pcap-ng header so that tcpdump knows
what protocol is in use, and can put the correct protocol dissector in
the chain.
And just for completeness, there potentially is a second tagging
scheme when you have multiple switches connected together in a
cluster, but that is internal to the cluster. The CPU is unaware of
it. But if you are snooping on the traffic, you need to know what the
protocol is, so you can decode the frames. In theory, that could be
EDSA or DSA, and can be selected per intra-switch link. In practice,
they are all DSA, and i've not heard of anybody actually snooping this
traffic.
Andrew
^ permalink raw reply
* Re: Re: [PATCH net-next v2 0/5] virtio: support packed ring
From: Tiwei Bie @ 2018-09-10 3:00 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: jasowang, virtualization, linux-kernel, netdev, virtio-dev, wexu,
jfreimann
In-Reply-To: <20180907084509-mutt-send-email-mst@kernel.org>
On Fri, Sep 07, 2018 at 09:00:49AM -0400, Michael S. Tsirkin wrote:
> On Fri, Sep 07, 2018 at 09:22:25AM +0800, Tiwei Bie wrote:
> > On Mon, Aug 27, 2018 at 05:00:40PM +0300, Michael S. Tsirkin wrote:
> > > Are there still plans to test the performance with vost pmd?
> > > vhost doesn't seem to show a performance gain ...
> > >
> >
> > I tried some performance tests with vhost PMD. In guest, the
> > XDP program will return XDP_DROP directly. And in host, testpmd
> > will do txonly fwd.
> >
> > When burst size is 1 and packet size is 64 in testpmd and
> > testpmd needs to iterate 5 Tx queues (but only the first two
> > queues are enabled) to prepare and inject packets, I got ~12%
> > performance boost (5.7Mpps -> 6.4Mpps). And if the vhost PMD
> > is faster (e.g. just need to iterate the first two queues to
> > prepare and inject packets), then I got similar performance
> > for both rings (~9.9Mpps) (packed ring's performance can be
> > lower, because it's more complicated in driver.)
> >
> > I think packed ring makes vhost PMD faster, but it doesn't make
> > the driver faster. In packed ring, the ring is simplified, and
> > the handling of the ring in vhost (device) is also simplified,
> > but things are not simplified in driver, e.g. although there is
> > no desc table in the virtqueue anymore, driver still needs to
> > maintain a private desc state table (which is still managed as
> > a list in this patch set) to support the out-of-order desc
> > processing in vhost (device).
> >
> > I think this patch set is mainly to make the driver have a full
> > functional support for the packed ring, which makes it possible
> > to leverage the packed ring feature in vhost (device). But I'm
> > not sure whether there is any other better idea, I'd like to
> > hear your thoughts. Thanks!
>
> Just this: Jens seems to report a nice gain with virtio and
> vhost pmd across the board. Try to compare virtio and
> virtio pmd to see what does pmd do better?
The virtio PMD (drivers/net/virtio) in DPDK doesn't need to share
the virtio ring operation code with other drivers and is highly
optimized for network. E.g. in Rx, the Rx burst function won't
chain descs. So the ID management for the Rx ring can be quite
simple and straightforward, we just need to initialize these IDs
when initializing the ring and don't need to change these IDs
in data path anymore (the mergable Rx code in that patch set
assumes the descs will be written back in order, which should be
fixed. I.e., the ID in the desc should be used to index vq->descx[]).
The Tx code in that patch set also assumes the descs will be
written back by device in order, which should be fixed.
But in kernel virtio driver, the virtio_ring.c is very generic.
The enqueue (virtqueue_add()) and dequeue (virtqueue_get_buf_ctx())
functions need to support all the virtio devices and should be
able to handle all the possible cases that may happen. So although
the packed ring can be very efficient in some cases, currently
the room to optimize the performance in kernel's virtio_ring.c
isn't that much. If we want to take the fully advantage of the
packed ring's efficiency, we need some further e.g. API changes
in virtio_ring.c, which shouldn't be part of this patch set. So
I still think this patch set is mainly to make the kernel virtio
driver to have a full functional support of the packed ring, and
we can't expect impressive performance boost with it.
>
>
> >
> > >
> > > On Wed, Jul 11, 2018 at 10:27:06AM +0800, Tiwei Bie wrote:
> > > > Hello everyone,
> > > >
> > > > This patch set implements packed ring support in virtio driver.
> > > >
> > > > Some functional tests have been done with Jason's
> > > > packed ring implementation in vhost:
> > > >
> > > > https://lkml.org/lkml/2018/7/3/33
> > > >
> > > > Both of ping and netperf worked as expected.
> > > >
> > > > v1 -> v2:
> > > > - Use READ_ONCE() to read event off_wrap and flags together (Jason);
> > > > - Add comments related to ccw (Jason);
> > > >
> > > > RFC (v6) -> v1:
> > > > - Avoid extra virtio_wmb() in virtqueue_enable_cb_delayed_packed()
> > > > when event idx is off (Jason);
> > > > - Fix bufs calculation in virtqueue_enable_cb_delayed_packed() (Jason);
> > > > - Test the state of the desc at used_idx instead of last_used_idx
> > > > in virtqueue_enable_cb_delayed_packed() (Jason);
> > > > - Save wrap counter (as part of queue state) in the return value
> > > > of virtqueue_enable_cb_prepare_packed();
> > > > - Refine the packed ring definitions in uapi;
> > > > - Rebase on the net-next tree;
> > > >
> > > > RFC v5 -> RFC v6:
> > > > - Avoid tracking addr/len/flags when DMA API isn't used (MST/Jason);
> > > > - Define wrap counter as bool (Jason);
> > > > - Use ALIGN() in vring_init_packed() (Jason);
> > > > - Avoid using pointer to track `next` in detach_buf_packed() (Jason);
> > > > - Add comments for barriers (Jason);
> > > > - Don't enable RING_PACKED on ccw for now (noticed by Jason);
> > > > - Refine the memory barrier in virtqueue_poll();
> > > > - Add a missing memory barrier in virtqueue_enable_cb_delayed_packed();
> > > > - Remove the hacks in virtqueue_enable_cb_prepare_packed();
> > > >
> > > > RFC v4 -> RFC v5:
> > > > - Save DMA addr, etc in desc state (Jason);
> > > > - Track used wrap counter;
> > > >
> > > > RFC v3 -> RFC v4:
> > > > - Make ID allocation support out-of-order (Jason);
> > > > - Various fixes for EVENT_IDX support;
> > > >
> > > > RFC v2 -> RFC v3:
> > > > - Split into small patches (Jason);
> > > > - Add helper virtqueue_use_indirect() (Jason);
> > > > - Just set id for the last descriptor of a list (Jason);
> > > > - Calculate the prev in virtqueue_add_packed() (Jason);
> > > > - Fix/improve desc suppression code (Jason/MST);
> > > > - Refine the code layout for XXX_split/packed and wrappers (MST);
> > > > - Fix the comments and API in uapi (MST);
> > > > - Remove the BUG_ON() for indirect (Jason);
> > > > - Some other refinements and bug fixes;
> > > >
> > > > RFC v1 -> RFC v2:
> > > > - Add indirect descriptor support - compile test only;
> > > > - Add event suppression supprt - compile test only;
> > > > - Move vring_packed_init() out of uapi (Jason, MST);
> > > > - Merge two loops into one in virtqueue_add_packed() (Jason);
> > > > - Split vring_unmap_one() for packed ring and split ring (Jason);
> > > > - Avoid using '%' operator (Jason);
> > > > - Rename free_head -> next_avail_idx (Jason);
> > > > - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
> > > > - Some other refinements and bug fixes;
> > > >
> > > > Thanks!
> > > >
> > > > Tiwei Bie (5):
> > > > virtio: add packed ring definitions
> > > > virtio_ring: support creating packed ring
> > > > virtio_ring: add packed ring support
> > > > virtio_ring: add event idx support in packed ring
> > > > virtio_ring: enable packed ring
> > > >
> > > > drivers/s390/virtio/virtio_ccw.c | 14 +
> > > > drivers/virtio/virtio_ring.c | 1365 ++++++++++++++++++++++------
> > > > include/linux/virtio_ring.h | 8 +-
> > > > include/uapi/linux/virtio_config.h | 3 +
> > > > include/uapi/linux/virtio_ring.h | 43 +
> > > > 5 files changed, 1157 insertions(+), 276 deletions(-)
> > > >
> > > > --
> > > > 2.18.0
> > >
> > > ---------------------------------------------------------------------
> > > To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> > > For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
> > >
^ permalink raw reply
* Re: [PATCH net V2] ip: frags: fix crash in ip_do_fragment()
From: David Miller @ 2018-09-09 21:51 UTC (permalink / raw)
To: edumazet; +Cc: ap420073, posk, netdev, pablo, fw
In-Reply-To: <CANn89iKXTr+zy7ms5v7RNaz0BvQ1DMt+A7OfyPkypJZ9BvzeDg@mail.gmail.com>
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 9 Sep 2018 13:51:21 -0700
>> Hi Eric,
>> Thank you for review!
>>
>> I think netfilter side ipv6 code change is needed
>> because netfilter ipv6 defrag routine also set fp->ip_defrag_offset value
>> so that fp->sk will not be NULL.
>> And I think these crash in ip_do_fragment() and ip6_fragment() are
>> actually same bug.
>>
>
> Right you are, thanks.
>
> Reviewed-by: Eric Dumazet <edumazet@google.com>
Applied, thanks everyone.
^ permalink raw reply
* Re: Re: [PATCH net-next v2 4/5] virtio_ring: add event idx support in packed ring
From: Tiwei Bie @ 2018-09-10 2:35 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: jasowang, virtualization, linux-kernel, netdev, virtio-dev, wexu,
jfreimann
In-Reply-To: <20180907100341-mutt-send-email-mst@kernel.org>
On Fri, Sep 07, 2018 at 10:10:14AM -0400, Michael S. Tsirkin wrote:
> On Wed, Jul 11, 2018 at 10:27:10AM +0800, Tiwei Bie wrote:
> > This commit introduces the EVENT_IDX support in packed ring.
> >
> > Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
>
> Besides the usual comment about hard-coded constants like <<15:
> does this actually do any good for performance? We don't
> have to if we do not want to.
Got it. Thanks.
>
> > ---
> > drivers/virtio/virtio_ring.c | 73 ++++++++++++++++++++++++++++++++----
> > 1 file changed, 65 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index f317b485ba54..f79a1e17f7d1 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -1050,7 +1050,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
> > static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > {
> > struct vring_virtqueue *vq = to_vvq(_vq);
> > - u16 flags;
> > + u16 new, old, off_wrap, flags, wrap_counter, event_idx;
> > bool needs_kick;
> > u32 snapshot;
> >
> > @@ -1059,9 +1059,19 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > * suppressions. */
> > virtio_mb(vq->weak_barriers);
> >
> > + old = vq->next_avail_idx - vq->num_added;
> > + new = vq->next_avail_idx;
> > + vq->num_added = 0;
> > +
> > snapshot = READ_ONCE(*(u32 *)vq->vring_packed.device);
> > + off_wrap = virtio16_to_cpu(_vq->vdev, (__virtio16)(snapshot & 0xffff));
> > flags = virtio16_to_cpu(_vq->vdev, (__virtio16)(snapshot >> 16)) & 0x3;
>
>
> some kind of struct union would be helpful to make this readable.
I will define a struct/union for this.
>
> >
> > + wrap_counter = off_wrap >> 15;
> > + event_idx = off_wrap & ~(1 << 15);
> > + if (wrap_counter != vq->avail_wrap_counter)
> > + event_idx -= vq->vring_packed.num;
> > +
> > #ifdef DEBUG
> > if (vq->last_add_time_valid) {
> > WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
> > @@ -1070,7 +1080,10 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > vq->last_add_time_valid = false;
> > #endif
> >
> > - needs_kick = (flags != VRING_EVENT_F_DISABLE);
> > + if (flags == VRING_EVENT_F_DESC)
> > + needs_kick = vring_need_event(event_idx, new, old);
> > + else
> > + needs_kick = (flags != VRING_EVENT_F_DISABLE);
> > END_USE(vq);
> > return needs_kick;
> > }
> > @@ -1185,6 +1198,15 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
> > ret = vq->desc_state_packed[id].data;
> > detach_buf_packed(vq, id, ctx);
> >
> > + /* If we expect an interrupt for the next entry, tell host
> > + * by writing event index and flush out the write before
> > + * the read in the next get_buf call. */
> > + if (vq->event_flags_shadow == VRING_EVENT_F_DESC)
> > + virtio_store_mb(vq->weak_barriers,
> > + &vq->vring_packed.driver->off_wrap,
> > + cpu_to_virtio16(_vq->vdev, vq->last_used_idx |
> > + ((u16)vq->used_wrap_counter << 15)));
> > +
> > #ifdef DEBUG
> > vq->last_add_time_valid = false;
> > #endif
> > @@ -1213,8 +1235,18 @@ static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
> > /* We optimistically turn back on interrupts, then check if there was
> > * more to do. */
> >
> > + if (vq->event) {
> > + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> > + vq->last_used_idx |
> > + ((u16)vq->used_wrap_counter << 15));
> > + /* We need to update event offset and event wrap
> > + * counter first before updating event flags. */
> > + virtio_wmb(vq->weak_barriers);
> > + }
> > +
> > if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > - vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> > + VRING_EVENT_F_ENABLE;
> > vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > vq->event_flags_shadow);
> > }
> > @@ -1238,22 +1270,47 @@ static bool virtqueue_poll_packed(struct virtqueue *_vq, unsigned off_wrap)
> > static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
> > {
> > struct vring_virtqueue *vq = to_vvq(_vq);
> > + u16 bufs, used_idx, wrap_counter;
> >
> > START_USE(vq);
> >
> > /* We optimistically turn back on interrupts, then check if there was
> > * more to do. */
> >
> > + if (vq->event) {
> > + /* TODO: tune this threshold */
> > + bufs = (vq->vring_packed.num - _vq->num_free) * 3 / 4;
> > + wrap_counter = vq->used_wrap_counter;
> > +
> > + used_idx = vq->last_used_idx + bufs;
> > + if (used_idx >= vq->vring_packed.num) {
> > + used_idx -= vq->vring_packed.num;
> > + wrap_counter ^= 1;
> > + }
> > +
> > + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> > + used_idx | (wrap_counter << 15));
> > +
> > + /* We need to update event offset and event wrap
> > + * counter first before updating event flags. */
> > + virtio_wmb(vq->weak_barriers);
> > + } else {
> > + used_idx = vq->last_used_idx;
> > + wrap_counter = vq->used_wrap_counter;
> > + }
> > +
> > if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > - vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> > + VRING_EVENT_F_ENABLE;
> > vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > vq->event_flags_shadow);
> > - /* We need to enable interrupts first before re-checking
> > - * for more used buffers. */
> > - virtio_mb(vq->weak_barriers);
> > }
> >
> > - if (more_used_packed(vq)) {
> > + /* We need to update event suppression structure first
> > + * before re-checking for more used buffers. */
> > + virtio_mb(vq->weak_barriers);
> > +
>
> mb is expensive. We should not do it if we changed nothing.
I will try to avoid it when possible.
>
> > + if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
> > END_USE(vq);
> > return false;
> > }
> > --
> > 2.18.0
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>
^ permalink raw reply
* Re: [PATCH net-next v2 2/5] virtio_ring: support creating packed ring
From: Tiwei Bie @ 2018-09-10 2:28 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: jasowang, virtualization, linux-kernel, netdev, virtio-dev, wexu,
jfreimann
In-Reply-To: <20180907095208-mutt-send-email-mst@kernel.org>
On Fri, Sep 07, 2018 at 10:03:24AM -0400, Michael S. Tsirkin wrote:
> On Wed, Jul 11, 2018 at 10:27:08AM +0800, Tiwei Bie wrote:
> > This commit introduces the support for creating packed ring.
> > All split ring specific functions are added _split suffix.
> > Some necessary stubs for packed ring are also added.
> >
> > Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
>
> I'd rather have a patch just renaming split functions, then
> add all packed stuff in as a separate patch on top.
Sure, I will do that.
>
>
> > ---
> > drivers/virtio/virtio_ring.c | 801 +++++++++++++++++++++++------------
> > include/linux/virtio_ring.h | 8 +-
> > 2 files changed, 546 insertions(+), 263 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 814b395007b2..c4f8abc7445a 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -60,11 +60,15 @@ struct vring_desc_state {
> > struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
> > };
> >
> > +struct vring_desc_state_packed {
> > + int next; /* The next desc state. */
>
> So this can go away with IN_ORDER?
Yes. If IN_ORDER is negotiated, next won't be needed anymore.
Currently, IN_ORDER isn't included in this patch set, because
some changes for split ring are needed to make sure that it
will use the descs in the expected order. After that,
optimizations can be done for both of split ring and packed
ring respectively.
>
> > +};
> > +
> > struct vring_virtqueue {
> > struct virtqueue vq;
> >
> > - /* Actual memory layout for this queue */
> > - struct vring vring;
> > + /* Is this a packed ring? */
> > + bool packed;
> >
> > /* Can we use weak barriers? */
> > bool weak_barriers;
> > @@ -86,11 +90,39 @@ struct vring_virtqueue {
> > /* Last used index we've seen. */
> > u16 last_used_idx;
> >
> > - /* Last written value to avail->flags */
> > - u16 avail_flags_shadow;
> > + union {
> > + /* Available for split ring */
> > + struct {
> > + /* Actual memory layout for this queue. */
> > + struct vring vring;
> >
> > - /* Last written value to avail->idx in guest byte order */
> > - u16 avail_idx_shadow;
> > + /* Last written value to avail->flags */
> > + u16 avail_flags_shadow;
> > +
> > + /* Last written value to avail->idx in
> > + * guest byte order. */
> > + u16 avail_idx_shadow;
> > + };
>
> Name this field split so it's easier to detect misuse of e.g.
> packed fields in split code?
Good point, I'll do that.
>
> > +
> > + /* Available for packed ring */
> > + struct {
> > + /* Actual memory layout for this queue. */
> > + struct vring_packed vring_packed;
> > +
> > + /* Driver ring wrap counter. */
> > + bool avail_wrap_counter;
> > +
> > + /* Device ring wrap counter. */
> > + bool used_wrap_counter;
> > +
> > + /* Index of the next avail descriptor. */
> > + u16 next_avail_idx;
> > +
> > + /* Last written value to driver->flags in
> > + * guest byte order. */
> > + u16 event_flags_shadow;
> > + };
> > + };
[...]
> > +
> > +/*
> > + * The layout for the packed ring is a continuous chunk of memory
> > + * which looks like this.
> > + *
> > + * struct vring_packed {
> > + * // The actual descriptors (16 bytes each)
> > + * struct vring_packed_desc desc[num];
> > + *
> > + * // Padding to the next align boundary.
> > + * char pad[];
> > + *
> > + * // Driver Event Suppression
> > + * struct vring_packed_desc_event driver;
> > + *
> > + * // Device Event Suppression
> > + * struct vring_packed_desc_event device;
> > + * };
> > + */
>
> Why not just allocate event structures separately?
> Is it a win to have them share a cache line for some reason?
Will do that.
>
> > +static inline void vring_init_packed(struct vring_packed *vr, unsigned int num,
> > + void *p, unsigned long align)
> > +{
> > + vr->num = num;
> > + vr->desc = p;
> > + vr->driver = (void *)ALIGN(((uintptr_t)p +
> > + sizeof(struct vring_packed_desc) * num), align);
> > + vr->device = vr->driver + 1;
> > +}
>
> What's all this about alignment? Where does it come from?
It comes from the `vring_align` parameter of vring_create_virtqueue()
and vring_new_virtqueue():
https://github.com/torvalds/linux/blob/a49a9dcce802/drivers/virtio/virtio_ring.c#L1061
https://github.com/torvalds/linux/blob/a49a9dcce802/drivers/virtio/virtio_ring.c#L1123
Should I just ignore it in packed ring?
CCW defined this:
#define KVM_VIRTIO_CCW_RING_ALIGN 4096
I'm not familiar with CCW. Currently, in this patch set, packed ring
isn't enabled on CCW dues to some legacy accessors are not implemented
in packed ring yet.
>
> > +
> > +static inline unsigned vring_size_packed(unsigned int num, unsigned long align)
> > +{
> > + return ((sizeof(struct vring_packed_desc) * num + align - 1)
> > + & ~(align - 1)) + sizeof(struct vring_packed_desc_event) * 2;
> > +}
[...]
> > @@ -1129,10 +1388,17 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
> > void (*callback)(struct virtqueue *vq),
> > const char *name)
> > {
> > - struct vring vring;
> > - vring_init(&vring, num, pages, vring_align);
> > - return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
> > - notify, callback, name);
> > + union vring_union vring;
> > + bool packed;
> > +
> > + packed = virtio_has_feature(vdev, VIRTIO_F_RING_PACKED);
> > + if (packed)
> > + vring_init_packed(&vring.vring_packed, num, pages, vring_align);
> > + else
> > + vring_init(&vring.vring_split, num, pages, vring_align);
>
>
> vring_init in the UAPI header is more or less a bug.
> I'd just stop using it, keep it around for legacy userspace.
Got it. I'd like to do that. Thanks.
>
> > +
> > + return __vring_new_virtqueue(index, vring, packed, vdev, weak_barriers,
> > + context, notify, callback, name);
> > }
> > EXPORT_SYMBOL_GPL(vring_new_virtqueue);
> >
> > @@ -1142,7 +1408,9 @@ void vring_del_virtqueue(struct virtqueue *_vq)
> >
> > if (vq->we_own_ring) {
> > vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes,
> > - vq->vring.desc, vq->queue_dma_addr);
> > + vq->packed ? (void *)vq->vring_packed.desc :
> > + (void *)vq->vring.desc,
> > + vq->queue_dma_addr);
> > }
> > list_del(&_vq->list);
> > kfree(vq);
> > @@ -1184,7 +1452,7 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
> >
> > struct vring_virtqueue *vq = to_vvq(_vq);
> >
> > - return vq->vring.num;
> > + return vq->packed ? vq->vring_packed.num : vq->vring.num;
> > }
> > EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
> >
> > @@ -1227,6 +1495,10 @@ dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
> >
> > BUG_ON(!vq->we_own_ring);
> >
> > + if (vq->packed)
> > + return vq->queue_dma_addr + ((char *)vq->vring_packed.driver -
> > + (char *)vq->vring_packed.desc);
> > +
> > return vq->queue_dma_addr +
> > ((char *)vq->vring.avail - (char *)vq->vring.desc);
> > }
> > @@ -1238,11 +1510,16 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
> >
> > BUG_ON(!vq->we_own_ring);
> >
> > + if (vq->packed)
> > + return vq->queue_dma_addr + ((char *)vq->vring_packed.device -
> > + (char *)vq->vring_packed.desc);
> > +
> > return vq->queue_dma_addr +
> > ((char *)vq->vring.used - (char *)vq->vring.desc);
> > }
> > EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
> >
> > +/* Only available for split ring */
> > const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> > {
> > return &to_vvq(vq)->vring;
> > diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
> > index fab02133a919..992142b35f55 100644
> > --- a/include/linux/virtio_ring.h
> > +++ b/include/linux/virtio_ring.h
> > @@ -60,6 +60,11 @@ static inline void virtio_store_mb(bool weak_barriers,
> > struct virtio_device;
> > struct virtqueue;
> >
> > +union vring_union {
> > + struct vring vring_split;
> > + struct vring_packed vring_packed;
> > +};
> > +
> > /*
> > * Creates a virtqueue and allocates the descriptor ring. If
> > * may_reduce_num is set, then this may allocate a smaller ring than
> > @@ -79,7 +84,8 @@ struct virtqueue *vring_create_virtqueue(unsigned int index,
> >
> > /* Creates a virtqueue with a custom layout. */
> > struct virtqueue *__vring_new_virtqueue(unsigned int index,
> > - struct vring vring,
> > + union vring_union vring,
> > + bool packed,
> > struct virtio_device *vdev,
> > bool weak_barriers,
> > bool ctx,
> > --
> > 2.18.0
^ permalink raw reply
* Re: [PATCH net-next v2 1/5] virtio: add packed ring definitions
From: Tiwei Bie @ 2018-09-10 2:13 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: jasowang, virtualization, linux-kernel, netdev, virtio-dev, wexu,
jfreimann
In-Reply-To: <20180907094922-mutt-send-email-mst@kernel.org>
On Fri, Sep 07, 2018 at 09:51:23AM -0400, Michael S. Tsirkin wrote:
> On Wed, Jul 11, 2018 at 10:27:07AM +0800, Tiwei Bie wrote:
> > Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> > ---
> > include/uapi/linux/virtio_config.h | 3 +++
> > include/uapi/linux/virtio_ring.h | 43 ++++++++++++++++++++++++++++++
> > 2 files changed, 46 insertions(+)
> >
> > diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
> > index 449132c76b1c..1196e1c1d4f6 100644
> > --- a/include/uapi/linux/virtio_config.h
> > +++ b/include/uapi/linux/virtio_config.h
> > @@ -75,6 +75,9 @@
> > */
> > #define VIRTIO_F_IOMMU_PLATFORM 33
> >
> > +/* This feature indicates support for the packed virtqueue layout. */
> > +#define VIRTIO_F_RING_PACKED 34
> > +
> > /*
> > * Does the device support Single Root I/O Virtualization?
> > */
> > diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h
> > index 6d5d5faa989b..0254a2ba29cf 100644
> > --- a/include/uapi/linux/virtio_ring.h
> > +++ b/include/uapi/linux/virtio_ring.h
> > @@ -44,6 +44,10 @@
> > /* This means the buffer contains a list of buffer descriptors. */
> > #define VRING_DESC_F_INDIRECT 4
> >
> > +/* Mark a descriptor as available or used. */
> > +#define VRING_DESC_F_AVAIL (1ul << 7)
> > +#define VRING_DESC_F_USED (1ul << 15)
> > +
> > /* The Host uses this in used->flags to advise the Guest: don't kick me when
> > * you add a buffer. It's unreliable, so it's simply an optimization. Guest
> > * will still kick if it's out of buffers. */
> > @@ -53,6 +57,17 @@
> > * optimization. */
> > #define VRING_AVAIL_F_NO_INTERRUPT 1
> >
> > +/* Enable events. */
> > +#define VRING_EVENT_F_ENABLE 0x0
> > +/* Disable events. */
> > +#define VRING_EVENT_F_DISABLE 0x1
> > +/*
> > + * Enable events for a specific descriptor
> > + * (as specified by Descriptor Ring Change Event Offset/Wrap Counter).
> > + * Only valid if VIRTIO_RING_F_EVENT_IDX has been negotiated.
> > + */
> > +#define VRING_EVENT_F_DESC 0x2
> > +
> > /* We support indirect buffer descriptors */
> > #define VIRTIO_RING_F_INDIRECT_DESC 28
> >
>
> These are for the packed ring, right? Pls prefix accordingly.
How about something like this:
#define VRING_PACKED_DESC_F_AVAIL (1u << 7)
#define VRING_PACKED_DESC_F_USED (1u << 15)
#define VRING_PACKED_EVENT_F_ENABLE 0x0
#define VRING_PACKED_EVENT_F_DISABLE 0x1
#define VRING_PACKED_EVENT_F_DESC 0x2
> Also, you likely need macros for the wrap counters.
How about something like this:
#define VRING_PACKED_EVENT_WRAP_COUNTER_SHIFT 15
#define VRING_PACKED_EVENT_WRAP_COUNTER_MASK \
(1u << VRING_PACKED_WRAP_COUNTER_SHIFT)
#define VRING_PACKED_EVENT_OFFSET_MASK \
((1u << VRING_PACKED_WRAP_COUNTER_SHIFT) - 1)
>
> > @@ -171,4 +186,32 @@ static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old)
> > return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old);
> > }
> >
> > +struct vring_packed_desc_event {
> > + /* Descriptor Ring Change Event Offset/Wrap Counter. */
> > + __virtio16 off_wrap;
> > + /* Descriptor Ring Change Event Flags. */
> > + __virtio16 flags;
> > +};
> > +
> > +struct vring_packed_desc {
> > + /* Buffer Address. */
> > + __virtio64 addr;
> > + /* Buffer Length. */
> > + __virtio32 len;
> > + /* Buffer ID. */
> > + __virtio16 id;
> > + /* The flags depending on descriptor type. */
> > + __virtio16 flags;
> > +};
>
> Don't use __virtioXX types, just __leXX ones.
Got it, will do that.
>
> > +
> > +struct vring_packed {
> > + unsigned int num;
> > +
> > + struct vring_packed_desc *desc;
> > +
> > + struct vring_packed_desc_event *driver;
> > +
> > + struct vring_packed_desc_event *device;
> > +};
> > +
> > #endif /* _UAPI_LINUX_VIRTIO_RING_H */
> > --
> > 2.18.0
^ permalink raw reply
* Re: [PATCH net-next] ieee802154: ca8210: Use kmemdup instead of duplicating it in ca8210_test_int_driver_write
From: YueHaibing @ 2018-09-10 2:08 UTC (permalink / raw)
To: davem, h.morris, alex.aring, stefan; +Cc: linux-kernel, netdev, linux-wpan
In-Reply-To: <20180906034228.7660-1-yuehaibing@huawei.com>
sorry, Pls ignore this duplicated patch.
On 2018/9/6 11:42, YueHaibing wrote:
> Replace calls to kmalloc followed by a memcpy with a direct call tokmemdup.
>
> Patch found using coccinelle.
>
> Signed-off-by: YueHaibing <yuehaibing@huawei.com>
> ---
> drivers/net/ieee802154/ca8210.c | 3 +--
> 1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c
> index 58299fb..e21279d 100644
> --- a/drivers/net/ieee802154/ca8210.c
> +++ b/drivers/net/ieee802154/ca8210.c
> @@ -634,10 +634,9 @@ static int ca8210_test_int_driver_write(
> for (i = 0; i < len; i++)
> dev_dbg(&priv->spi->dev, "%#03x\n", buf[i]);
>
> - fifo_buffer = kmalloc(len, GFP_KERNEL);
> + fifo_buffer = kmemdup(buf, len, GFP_KERNEL);
> if (!fifo_buffer)
> return -ENOMEM;
> - memcpy(fifo_buffer, buf, len);
> kfifo_in(&test->up_fifo, &fifo_buffer, 4);
> wake_up_interruptible(&priv->test.readq);
>
>
^ permalink raw reply
* Re: [PATCH net-next v2 3/5] virtio_ring: add packed ring support
From: Tiwei Bie @ 2018-09-10 2:03 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: virtio-dev, netdev, linux-kernel, virtualization, wexu
In-Reply-To: <20180907083500-mutt-send-email-mst@kernel.org>
On Fri, Sep 07, 2018 at 09:49:14AM -0400, Michael S. Tsirkin wrote:
> On Wed, Jul 11, 2018 at 10:27:09AM +0800, Tiwei Bie wrote:
> > This commit introduces the support (without EVENT_IDX) for
> > packed ring.
> >
> > Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> > ---
> > drivers/virtio/virtio_ring.c | 495 ++++++++++++++++++++++++++++++++++-
> > 1 file changed, 487 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index c4f8abc7445a..f317b485ba54 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -55,12 +55,21 @@
> > #define END_USE(vq)
> > #endif
> >
> > +#define _VRING_DESC_F_AVAIL(b) ((__u16)(b) << 7)
> > +#define _VRING_DESC_F_USED(b) ((__u16)(b) << 15)
>
> Underscore followed by an upper case letter isn't a good idea
> for a symbol. And it's not nice that this does not
> use the VRING_DESC_F_USED from the header.
> How about ((b) ? VRING_DESC_F_USED : 0) instead?
> Is produced code worse then?
Yes, I think the produced code is worse when we use
conditional expression. Below is a simple test:
#define foo1(b) ((b) << 10)
#define foo2(b) ((b) ? (1 << 10) : 0)
unsigned short bar(unsigned short b)
{
return foo1(b);
}
unsigned short baz(unsigned short b)
{
return foo2(b);
}
With `gcc -O3 -S`, I got below assembly code:
.file "tmp.c"
.text
.p2align 4,,15
.globl bar
.type bar, @function
bar:
.LFB0:
.cfi_startproc
movl %edi, %eax
sall $10, %eax
ret
.cfi_endproc
.LFE0:
.size bar, .-bar
.p2align 4,,15
.globl baz
.type baz, @function
baz:
.LFB1:
.cfi_startproc
xorl %eax, %eax
testw %di, %di
setne %al
sall $10, %eax
ret
.cfi_endproc
.LFE1:
.size baz, .-baz
.ident "GCC: (Debian 8.2.0-4) 8.2.0"
.section .note.GNU-stack,"",@progbits
That is to say, for `((b) << 10)`, it will shift the
register directly. But for `((b) ? (1 << 10) : 0)`,
in above code, it will zero eax first, and set al
to 1 depend on whether di is 0, and shift eax.
>
> > +
> > struct vring_desc_state {
> > void *data; /* Data for callback. */
> > struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
> > };
> >
> > struct vring_desc_state_packed {
> > + void *data; /* Data for callback. */
> > + struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
>
> Include vring_desc_state for these?
Sure.
>
> > + int num; /* Descriptor list length. */
> > + dma_addr_t addr; /* Buffer DMA addr. */
> > + u32 len; /* Buffer length. */
> > + u16 flags; /* Descriptor flags. */
>
> This seems only to be used for indirect. Check indirect field above
> instead and drop this.
The `flags` is also needed to know the direction, i.e. DMA_FROM_DEVICE
or DMA_TO_DEVICE when do DMA unmap.
>
> > int next; /* The next desc state. */
>
> Packing things into 16 bit integers will help reduce
> cache pressure here.
>
> Also, this is only used for unmap, so when dma API is not used,
> maintaining addr and len list management is unnecessary. How about we
> maintain addr/len in a separate array, avoiding accesses on unmap in that case?
Sure. I'll give it a try.
>
> Also, lots of architectures have a nop unmap in the DMA API.
> See a proposed patch at the end for optimizing that case.
Got it. Thanks.
>
> > };
> >
> > @@ -660,7 +669,6 @@ static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx)
> > {
> > struct vring_virtqueue *vq = to_vvq(_vq);
> >
> > - virtio_mb(vq->weak_barriers);
> > return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, vq->vring.used->idx);
> > }
>
> why is this changing the split queue implementation?
Because above barrier is also needed by virtqueue_poll_packed(),
so I moved it to virtqueue_poll() and also added some comments
for it.
>
>
> >
> > @@ -757,6 +765,72 @@ static inline unsigned vring_size_packed(unsigned int num, unsigned long align)
> > & ~(align - 1)) + sizeof(struct vring_packed_desc_event) * 2;
> > }
> >
> > +static void vring_unmap_state_packed(const struct vring_virtqueue *vq,
> > + struct vring_desc_state_packed *state)
> > +{
> > + u16 flags;
> > +
> > + if (!vring_use_dma_api(vq->vq.vdev))
> > + return;
> > +
> > + flags = state->flags;
> > +
> > + if (flags & VRING_DESC_F_INDIRECT) {
> > + dma_unmap_single(vring_dma_dev(vq),
> > + state->addr, state->len,
> > + (flags & VRING_DESC_F_WRITE) ?
> > + DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > + } else {
> > + dma_unmap_page(vring_dma_dev(vq),
> > + state->addr, state->len,
> > + (flags & VRING_DESC_F_WRITE) ?
> > + DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > + }
> > +}
> > +
> > +static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
> > + struct vring_packed_desc *desc)
> > +{
> > + u16 flags;
> > +
> > + if (!vring_use_dma_api(vq->vq.vdev))
> > + return;
> > +
> > + flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
>
> I see no reason to use virtioXX wrappers for the packed ring.
> That's there to support legacy devices. Just use leXX.
Okay.
>
> > +
> > + if (flags & VRING_DESC_F_INDIRECT) {
> > + dma_unmap_single(vring_dma_dev(vq),
> > + virtio64_to_cpu(vq->vq.vdev, desc->addr),
> > + virtio32_to_cpu(vq->vq.vdev, desc->len),
> > + (flags & VRING_DESC_F_WRITE) ?
> > + DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > + } else {
> > + dma_unmap_page(vring_dma_dev(vq),
> > + virtio64_to_cpu(vq->vq.vdev, desc->addr),
> > + virtio32_to_cpu(vq->vq.vdev, desc->len),
> > + (flags & VRING_DESC_F_WRITE) ?
> > + DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > + }
> > +}
> > +
> > +static struct vring_packed_desc *alloc_indirect_packed(struct virtqueue *_vq,
> > + unsigned int total_sg,
> > + gfp_t gfp)
> > +{
> > + struct vring_packed_desc *desc;
> > +
> > + /*
> > + * We require lowmem mappings for the descriptors because
> > + * otherwise virt_to_phys will give us bogus addresses in the
> > + * virtqueue.
>
> Where is virt_to_phys used? I don't see it in this patch.
In vring_map_single(), virt_to_phys() will be used to translate
the address to phys if dma api isn't used:
https://github.com/torvalds/linux/blob/a49a9dcce802/drivers/virtio/virtio_ring.c#L197
>
> > + */
> > + gfp &= ~__GFP_HIGHMEM;
> > +
> > + desc = kmalloc(total_sg * sizeof(struct vring_packed_desc), gfp);
> > +
> > + return desc;
> > +}
> > +
> > static inline int virtqueue_add_packed(struct virtqueue *_vq,
> > struct scatterlist *sgs[],
> > unsigned int total_sg,
> > @@ -766,47 +840,449 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
> > void *ctx,
> > gfp_t gfp)
> > {
> > + struct vring_virtqueue *vq = to_vvq(_vq);
> > + struct vring_packed_desc *desc;
> > + struct scatterlist *sg;
> > + unsigned int i, n, descs_used, uninitialized_var(prev), err_idx;
> > + __virtio16 uninitialized_var(head_flags), flags;
> > + u16 head, avail_wrap_counter, id, curr;
> > + bool indirect;
> > +
> > + START_USE(vq);
> > +
> > + BUG_ON(data == NULL);
> > + BUG_ON(ctx && vq->indirect);
> > +
> > + if (unlikely(vq->broken)) {
> > + END_USE(vq);
> > + return -EIO;
> > + }
> > +
> > +#ifdef DEBUG
> > + {
> > + ktime_t now = ktime_get();
> > +
> > + /* No kick or get, with .1 second between? Warn. */
> > + if (vq->last_add_time_valid)
> > + WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
> > + > 100);
> > + vq->last_add_time = now;
> > + vq->last_add_time_valid = true;
> > + }
> > +#endif
>
> Add incline helpers for this debug stuff rather than
> duplicate it from split ring?
Sure. I'd like to do that.
>
>
> > +
> > + BUG_ON(total_sg == 0);
> > +
> > + head = vq->next_avail_idx;
> > + avail_wrap_counter = vq->avail_wrap_counter;
> > +
> > + if (virtqueue_use_indirect(_vq, total_sg))
> > + desc = alloc_indirect_packed(_vq, total_sg, gfp);
> > + else {
> > + desc = NULL;
> > + WARN_ON_ONCE(total_sg > vq->vring_packed.num && !vq->indirect);
> > + }
>
>
> Apparently this attempts to treat indirect descriptors same as
> direct ones. This is how it is in the split ring, but not in
> the packed one. I think you want two separate functions, for
> direct and indirect.
Okay, I'll do that.
>
> > +
> > + if (desc) {
> > + /* Use a single buffer which doesn't continue */
> > + indirect = true;
> > + /* Set up rest to use this indirect table. */
> > + i = 0;
> > + descs_used = 1;
> > + } else {
> > + indirect = false;
> > + desc = vq->vring_packed.desc;
> > + i = head;
> > + descs_used = total_sg;
> > + }
> > +
> > + if (vq->vq.num_free < descs_used) {
> > + pr_debug("Can't add buf len %i - avail = %i\n",
> > + descs_used, vq->vq.num_free);
> > + /* FIXME: for historical reasons, we force a notify here if
> > + * there are outgoing parts to the buffer. Presumably the
> > + * host should service the ring ASAP. */
> > + if (out_sgs)
> > + vq->notify(&vq->vq);
> > + if (indirect)
> > + kfree(desc);
> > + END_USE(vq);
> > + return -ENOSPC;
> > + }
> > +
> > + id = vq->free_head;
> > + BUG_ON(id == vq->vring_packed.num);
> > +
> > + curr = id;
> > + for (n = 0; n < out_sgs + in_sgs; n++) {
> > + for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> > + dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
> > + DMA_TO_DEVICE : DMA_FROM_DEVICE);
> > + if (vring_mapping_error(vq, addr))
> > + goto unmap_release;
> > +
> > + flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT |
> > + (n < out_sgs ? 0 : VRING_DESC_F_WRITE) |
> > + _VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
> > + _VRING_DESC_F_USED(!vq->avail_wrap_counter));
>
> Spec says:
> The VIRTQ_DESC_F_WRITE flags bit is the only valid flag for descriptors in the
> indirect table.
>
> All this logic isn't needed for indirect.
>
> Also, why re-calculate avail/used flags every time? They only change
> when you wrap around.
Will do that. Thanks.
>
>
> > + if (!indirect && i == head)
> > + head_flags = flags;
> > + else
> > + desc[i].flags = flags;
> > +
> > + desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
> > + desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
> > + i++;
> > + if (!indirect) {
> > + if (vring_use_dma_api(_vq->vdev)) {
> > + vq->desc_state_packed[curr].addr = addr;
> > + vq->desc_state_packed[curr].len =
> > + sg->length;
> > + vq->desc_state_packed[curr].flags =
> > + virtio16_to_cpu(_vq->vdev,
> > + flags);
> > + }
> > + curr = vq->desc_state_packed[curr].next;
> > +
> > + if (i >= vq->vring_packed.num) {
> > + i = 0;
> > + vq->avail_wrap_counter ^= 1;
> > + }
> > + }
> > + }
> > + }
> > +
> > + prev = (i > 0 ? i : vq->vring_packed.num) - 1;
> > + desc[prev].id = cpu_to_virtio16(_vq->vdev, id);
>
> Is it easier to write this out in all descriptors, to avoid the need to
> calculate prev?
Yeah, I'll do that.
>
> > +
> > + /* Last one doesn't continue. */
> > + if (total_sg == 1)
> > + head_flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
> > + else
> > + desc[prev].flags &= cpu_to_virtio16(_vq->vdev,
> > + ~VRING_DESC_F_NEXT);
>
> Wouldn't it be easier to avoid setting VRING_DESC_F_NEXT
> in the first place?
> if (n != in_sgs - 1 && n != out_sgs - 1)
Will this affect the branch prediction?
>
> must be better than writing descriptor an extra time.
Not quite sure about this. I think this descriptor has just
been written, it should still be in the cache.
>
> > +
> > + if (indirect) {
> > + /* Now that the indirect table is filled in, map it. */
> > + dma_addr_t addr = vring_map_single(
> > + vq, desc, total_sg * sizeof(struct vring_packed_desc),
> > + DMA_TO_DEVICE);
> > + if (vring_mapping_error(vq, addr))
> > + goto unmap_release;
> > +
> > + head_flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT |
> > + _VRING_DESC_F_AVAIL(avail_wrap_counter) |
> > + _VRING_DESC_F_USED(!avail_wrap_counter));
> > + vq->vring_packed.desc[head].addr = cpu_to_virtio64(_vq->vdev,
> > + addr);
> > + vq->vring_packed.desc[head].len = cpu_to_virtio32(_vq->vdev,
> > + total_sg * sizeof(struct vring_packed_desc));
> > + vq->vring_packed.desc[head].id = cpu_to_virtio16(_vq->vdev, id);
> > +
> > + if (vring_use_dma_api(_vq->vdev)) {
> > + vq->desc_state_packed[id].addr = addr;
> > + vq->desc_state_packed[id].len = total_sg *
> > + sizeof(struct vring_packed_desc);
> > + vq->desc_state_packed[id].flags =
> > + virtio16_to_cpu(_vq->vdev, head_flags);
> > + }
> > + }
> > +
> > + /* We're using some buffers from the free list. */
> > + vq->vq.num_free -= descs_used;
> > +
> > + /* Update free pointer */
> > + if (indirect) {
> > + n = head + 1;
> > + if (n >= vq->vring_packed.num) {
> > + n = 0;
> > + vq->avail_wrap_counter ^= 1;
> > + }
> > + vq->next_avail_idx = n;
> > + vq->free_head = vq->desc_state_packed[id].next;
> > + } else {
> > + vq->next_avail_idx = i;
> > + vq->free_head = curr;
> > + }
> > +
> > + /* Store token and indirect buffer state. */
> > + vq->desc_state_packed[id].num = descs_used;
> > + vq->desc_state_packed[id].data = data;
> > + if (indirect)
> > + vq->desc_state_packed[id].indir_desc = desc;
> > + else
> > + vq->desc_state_packed[id].indir_desc = ctx;
> > +
> > + /* A driver MUST NOT make the first descriptor in the list
> > + * available before all subsequent descriptors comprising
> > + * the list are made available. */
> > + virtio_wmb(vq->weak_barriers);
> > + vq->vring_packed.desc[head].flags = head_flags;
> > + vq->num_added += descs_used;
> > +
> > + pr_debug("Added buffer head %i to %p\n", head, vq);
> > + END_USE(vq);
> > +
> > + return 0;
> > +
> > +unmap_release:
> > + err_idx = i;
> > + i = head;
> > +
> > + for (n = 0; n < total_sg; n++) {
> > + if (i == err_idx)
> > + break;
> > + vring_unmap_desc_packed(vq, &desc[i]);
> > + i++;
> > + if (!indirect && i >= vq->vring_packed.num)
> > + i = 0;
> > + }
> > +
> > + vq->avail_wrap_counter = avail_wrap_counter;
> > +
> > + if (indirect)
> > + kfree(desc);
> > +
> > + END_USE(vq);
> > return -EIO;
> > }
> >
> > static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > {
> > - return false;
> > + struct vring_virtqueue *vq = to_vvq(_vq);
> > + u16 flags;
> > + bool needs_kick;
> > + u32 snapshot;
> > +
> > + START_USE(vq);
> > + /* We need to expose the new flags value before checking notification
> > + * suppressions. */
> > + virtio_mb(vq->weak_barriers);
> > +
> > + snapshot = READ_ONCE(*(u32 *)vq->vring_packed.device);
> > + flags = virtio16_to_cpu(_vq->vdev, (__virtio16)(snapshot >> 16)) & 0x3;
>
> What are all these hard-coded things? Also either we do READ_ONCE
> everywhere or nowhere. Why is this place special? Any why read 32 bit
> if you only want the 16?
Yeah, READ_ONCE() and reading 32bits are not really needed in
this patch. But it's needed in the next patch. I thought it's
not wrong to do this, so I introduced them from the first place.
Just to double check: is the below code (apart from the
hard-coded value and virtio16) from the next patch OK?
"""
snapshot = READ_ONCE(*(u32 *)vq->vring_packed.device);
+ off_wrap = virtio16_to_cpu(_vq->vdev, (__virtio16)(snapshot & 0xffff));
flags = virtio16_to_cpu(_vq->vdev, (__virtio16)(snapshot >> 16)) & 0x3;
"""
>
> And doesn't sparse complain about cast to __virtio16?
I'll give it a try in the next version.
>
> > +
> > +#ifdef DEBUG
> > + if (vq->last_add_time_valid) {
> > + WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
> > + vq->last_add_time)) > 100);
> > + }
> > + vq->last_add_time_valid = false;
> > +#endif
> > +
> > + needs_kick = (flags != VRING_EVENT_F_DISABLE);
> > + END_USE(vq);
> > + return needs_kick;
> > +}
> > +
> > +static void detach_buf_packed(struct vring_virtqueue *vq,
> > + unsigned int id, void **ctx)
> > +{
> > + struct vring_desc_state_packed *state = NULL;
> > + struct vring_packed_desc *desc;
> > + unsigned int curr, i;
> > +
> > + /* Clear data ptr. */
> > + vq->desc_state_packed[id].data = NULL;
> > +
> > + curr = id;
> > + for (i = 0; i < vq->desc_state_packed[id].num; i++) {
> > + state = &vq->desc_state_packed[curr];
> > + vring_unmap_state_packed(vq, state);
> > + curr = state->next;
> > + }
> > +
> > + BUG_ON(state == NULL);
> > + vq->vq.num_free += vq->desc_state_packed[id].num;
> > + state->next = vq->free_head;
> > + vq->free_head = id;
> > +
> > + if (vq->indirect) {
> > + u32 len;
> > +
> > + /* Free the indirect table, if any, now that it's unmapped. */
> > + desc = vq->desc_state_packed[id].indir_desc;
> > + if (!desc)
> > + return;
> > +
> > + if (vring_use_dma_api(vq->vq.vdev)) {
> > + len = vq->desc_state_packed[id].len;
> > + for (i = 0; i < len / sizeof(struct vring_packed_desc);
> > + i++)
> > + vring_unmap_desc_packed(vq, &desc[i]);
> > + }
> > + kfree(desc);
> > + vq->desc_state_packed[id].indir_desc = NULL;
> > + } else if (ctx) {
> > + *ctx = vq->desc_state_packed[id].indir_desc;
> > + }
> > +}
> > +
> > +static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
> > + u16 idx, bool used_wrap_counter)
> > +{
> > + u16 flags;
> > + bool avail, used;
> > +
> > + flags = virtio16_to_cpu(vq->vq.vdev,
> > + vq->vring_packed.desc[idx].flags);
> > + avail = !!(flags & VRING_DESC_F_AVAIL);
> > + used = !!(flags & VRING_DESC_F_USED);
> > +
> > + return avail == used && used == used_wrap_counter;
>
> I think that you don't need to look at avail flag to detect a used
> descriptor. The reason device writes it is to avoid confusing
> *device* next time descriptor wraps.
Okay, I'll just check the used flag.
>
> > }
> >
> > static inline bool more_used_packed(const struct vring_virtqueue *vq)
> > {
> > - return false;
> > + return is_used_desc_packed(vq, vq->last_used_idx,
> > + vq->used_wrap_counter);
> > }
> >
> > static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
> > unsigned int *len,
> > void **ctx)
> > {
> > - return NULL;
> > + struct vring_virtqueue *vq = to_vvq(_vq);
> > + u16 last_used, id;
> > + void *ret;
> > +
> > + START_USE(vq);
> > +
> > + if (unlikely(vq->broken)) {
> > + END_USE(vq);
> > + return NULL;
> > + }
> > +
> > + if (!more_used_packed(vq)) {
> > + pr_debug("No more buffers in queue\n");
> > + END_USE(vq);
> > + return NULL;
> > + }
> > +
> > + /* Only get used elements after they have been exposed by host. */
> > + virtio_rmb(vq->weak_barriers);
> > +
> > + last_used = vq->last_used_idx;
> > + id = virtio16_to_cpu(_vq->vdev, vq->vring_packed.desc[last_used].id);
> > + *len = virtio32_to_cpu(_vq->vdev, vq->vring_packed.desc[last_used].len);
> > +
> > + if (unlikely(id >= vq->vring_packed.num)) {
> > + BAD_RING(vq, "id %u out of range\n", id);
> > + return NULL;
> > + }
> > + if (unlikely(!vq->desc_state_packed[id].data)) {
> > + BAD_RING(vq, "id %u is not a head!\n", id);
> > + return NULL;
> > + }
> > +
> > + vq->last_used_idx += vq->desc_state_packed[id].num;
> > + if (vq->last_used_idx >= vq->vring_packed.num) {
> > + vq->last_used_idx -= vq->vring_packed.num;
> > + vq->used_wrap_counter ^= 1;
> > + }
> > +
> > + /* detach_buf_packed clears data, so grab it now. */
> > + ret = vq->desc_state_packed[id].data;
> > + detach_buf_packed(vq, id, ctx);
> > +
> > +#ifdef DEBUG
> > + vq->last_add_time_valid = false;
> > +#endif
> > +
> > + END_USE(vq);
> > + return ret;
> > }
> >
> > static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
> > {
> > + struct vring_virtqueue *vq = to_vvq(_vq);
> > +
> > + if (vq->event_flags_shadow != VRING_EVENT_F_DISABLE) {
> > + vq->event_flags_shadow = VRING_EVENT_F_DISABLE;
> > + vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > + vq->event_flags_shadow);
> > + }
> > }
> >
> > static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
> > {
> > - return 0;
> > + struct vring_virtqueue *vq = to_vvq(_vq);
> > +
> > + START_USE(vq);
> > +
> > + /* We optimistically turn back on interrupts, then check if there was
> > + * more to do. */
> > +
> > + if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > + vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > + vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > + vq->event_flags_shadow);
> > + }
> > +
> > + END_USE(vq);
> > + return vq->last_used_idx | ((u16)vq->used_wrap_counter << 15);
> > }
> >
> > -static bool virtqueue_poll_packed(struct virtqueue *_vq, unsigned last_used_idx)
> > +static bool virtqueue_poll_packed(struct virtqueue *_vq, unsigned off_wrap)
> > {
> > - return false;
> > + struct vring_virtqueue *vq = to_vvq(_vq);
> > + bool wrap_counter;
> > + u16 used_idx;
> > +
> > + wrap_counter = off_wrap >> 15;
> > + used_idx = off_wrap & ~(1 << 15);
> > +
> > + return is_used_desc_packed(vq, used_idx, wrap_counter);
>
> These >> 15 << 15 all over the place duplicate info.
> Pls put 15 in the header.
Sure.
>
> Also can you maintain the counters properly shifted?
> Then just use them.
Then, we may need to maintain both of the shifted wrapper
counters and un-shifted wrapper counters at the same time.
>
>
> > }
> >
> > static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
> > {
> > - return false;
> > + struct vring_virtqueue *vq = to_vvq(_vq);
> > +
> > + START_USE(vq);
> > +
> > + /* We optimistically turn back on interrupts, then check if there was
> > + * more to do. */
> > +
> > + if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > + vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > + vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > + vq->event_flags_shadow);
> > + /* We need to enable interrupts first before re-checking
> > + * for more used buffers. */
> > + virtio_mb(vq->weak_barriers);
> > + }
> > +
> > + if (more_used_packed(vq)) {
> > + END_USE(vq);
> > + return false;
> > + }
> > +
> > + END_USE(vq);
> > + return true;
> > }
> >
> > static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
> > {
> > + struct vring_virtqueue *vq = to_vvq(_vq);
> > + unsigned int i;
> > + void *buf;
> > +
> > + START_USE(vq);
> > +
> > + for (i = 0; i < vq->vring_packed.num; i++) {
> > + if (!vq->desc_state_packed[i].data)
> > + continue;
> > + /* detach_buf clears data, so grab it now. */
> > + buf = vq->desc_state_packed[i].data;
> > + detach_buf_packed(vq, i, NULL);
> > + END_USE(vq);
> > + return buf;
> > + }
> > + /* That should have freed everything. */
> > + BUG_ON(vq->vq.num_free != vq->vring_packed.num);
> > +
> > + END_USE(vq);
> > return NULL;
> > }
> >
> > @@ -1083,6 +1559,9 @@ bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx)
> > {
> > struct vring_virtqueue *vq = to_vvq(_vq);
> >
> > + /* We need to enable interrupts first before re-checking
> > + * for more used buffers. */
> > + virtio_mb(vq->weak_barriers);
> > return vq->packed ? virtqueue_poll_packed(_vq, last_used_idx) :
> > virtqueue_poll_split(_vq, last_used_idx);
> > }
>
> Possible optimization for when dma API is in use:
Got it. Will give it a try!
Best regards,
Tiwei Bie
>
> --->
>
> dma: detecting nop unmap
>
> drivers need to maintain the dma address for unmap purposes,
> but these cycles are wasted when unmap callback is not
> defined. Add an API for drivers to check that and avoid
> unmap completely. Debug builds still have unmap.
>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>
> ----
>
> diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
> index a785f2507159..38b2c27c8449 100644
> --- a/include/linux/dma-debug.h
> +++ b/include/linux/dma-debug.h
> @@ -42,6 +42,11 @@ extern void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
> extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
> size_t size, int direction, bool map_single);
>
> +static inline bool has_debug_dma_unmap(struct device *dev)
> +{
> + return true;
> +}
> +
> extern void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
> int nents, int mapped_ents, int direction);
>
> @@ -121,6 +126,11 @@ static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
> {
> }
>
> +static inline bool has_debug_dma_unmap(struct device *dev)
> +{
> + return false;
> +}
> +
> static inline void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
> int nents, int mapped_ents, int direction)
> {
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index 1db6a6b46d0d..656f3e518166 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -241,6 +241,14 @@ static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr,
> return addr;
> }
>
> +static inline bool has_dma_unmap(struct device *dev)
> +{
> + const struct dma_map_ops *ops = get_dma_ops(dev);
> +
> + return ops->unmap_page || ops->unmap_sg || ops->unmap_resource ||
> + has_dma_unmap(dev);
> +}
> +
> static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr,
> size_t size,
> enum dma_data_direction dir,
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox