Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next 4/5] bonding: fix LACP PDU not sent on slave port sometimes
From: Jonathan Toppins @ 2015-01-16 15:57 UTC (permalink / raw)
  To: netdev; +Cc: Andy Gospodarek, Satish Ashok, Jonathan Toppins
In-Reply-To: <1421423848-414-1-git-send-email-jtoppins@cumulusnetworks.com>

From: Satish Ashok <sashok@cumulusnetworks.com>

When a slave is added to a bond and it is not in full duplex mode,
AD_PORT_LACP_ENABLED flag is cleared, due to this LACP PDU is not sent
on slave. When the duplex is changed to full, the flag needs to be set
to send LACP PDU.

Cc: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: Satish Ashok <sashok@cumulusnetworks.com>
Signed-off-by: Jonathan Toppins <jtoppins@cumulusnetworks.com>
---
 drivers/net/bonding/bond_3ad.c |   11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 52a8772..43bb0b0 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2224,8 +2224,10 @@ static int bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave,
 		switch (lacpdu->subtype) {
 		case AD_TYPE_LACPDU:
 			ret = RX_HANDLER_CONSUMED;
-			netdev_dbg(slave->bond->dev, "Received LACPDU on port %d\n",
-				   port->actor_port_number);
+			netdev_dbg(slave->bond->dev,
+				   "Received LACPDU on port %d slave %s\n",
+				   port->actor_port_number,
+				   slave->dev->name);
 			/* Protect against concurrent state machines */
 			spin_lock(&slave->bond->mode_lock);
 			ad_rx_machine(lacpdu, port);
@@ -2317,7 +2319,10 @@ void bond_3ad_adapter_duplex_changed(struct slave *slave)
 	port->actor_admin_port_key &= ~AD_DUPLEX_KEY_MASKS;
 	port->actor_oper_port_key = port->actor_admin_port_key |=
 		__get_duplex(port);
-	netdev_dbg(slave->bond->dev, "Port %d changed duplex\n", port->actor_port_number);
+	netdev_dbg(slave->bond->dev, "Port %d slave %s changed duplex\n",
+		   port->actor_port_number, slave->dev->name);
+	if (port->actor_oper_port_key & AD_DUPLEX_KEY_MASKS)
+		port->sm_vars |= AD_PORT_LACP_ENABLED;
 	/* there is no need to reselect a new aggregator, just signal the
 	 * state machines to reinitialize
 	 */
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH net-next 3/5] bonding: fix incorrect lacp mux state when agg not active
From: Jonathan Toppins @ 2015-01-16 15:57 UTC (permalink / raw)
  To: netdev; +Cc: Andy Gospodarek, Wilson Kok, Jonathan Toppins
In-Reply-To: <1421423848-414-1-git-send-email-jtoppins@cumulusnetworks.com>

From: Wilson Kok <wkok@cumulusnetworks.com>

This patch attempts to fix the following problems when an actor or
partner's aggregator is not active:
    1. a slave's lacp port state is marked as AD_STATE_SYNCHRONIZATION
       even if it is attached to an inactive aggregator. LACP advertises
       this state to the partner, making the partner think he can move
       into COLLECTING_DISTRIBUTING state even though this link will not
       pass traffic on the local side

    2. a slave goes into COLLECTING_DISTRIBUTING state without checking
       if the aggregator is actually active

    3. when in COLLECTING_DISTRIBUTING state, the partner parameters may
       change, e.g. the partner_oper_port_state.SYNCHRONIZATION. The
       local mux machine is not reacting to the change and continue to
       keep the slave and bond up

    4. When bond slave leaves an inactive aggregator and joins an active
       aggregator, the actor oper port state need to update to SYNC state.

Cc: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: Wilson Kok <wkok@cumulusnetworks.com>
Signed-off-by: Jonathan Toppins <jtoppins@cumulusnetworks.com>
---
 drivers/net/bonding/bond_3ad.c |   44 ++++++++++++++++++++++++++++++++--------
 1 file changed, 35 insertions(+), 9 deletions(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index e9b706f..52a8772 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -471,10 +471,13 @@ static void __record_pdu(struct lacpdu *lacpdu, struct port *port)
 		 * and the port is matched
 		 */
 		if ((port->sm_vars & AD_PORT_MATCHED)
-		    && (lacpdu->actor_state & AD_STATE_SYNCHRONIZATION))
+			&& (lacpdu->actor_state & AD_STATE_SYNCHRONIZATION)) {
 			partner->port_state |= AD_STATE_SYNCHRONIZATION;
-		else
+			pr_debug("%s partner sync=1\n", port->slave->dev->name);
+		} else {
 			partner->port_state &= ~AD_STATE_SYNCHRONIZATION;
+			pr_debug("%s partner sync=0\n", port->slave->dev->name);
+		}
 	}
 }
 
@@ -729,6 +732,8 @@ static inline void __update_lacpdu_from_port(struct port *port)
 	lacpdu->actor_port_priority = htons(port->actor_port_priority);
 	lacpdu->actor_port = htons(port->actor_port_number);
 	lacpdu->actor_state = port->actor_oper_port_state;
+	pr_debug("update lacpdu: %s, actor port state %x\n",
+		 port->slave->dev->name, port->actor_oper_port_state);
 
 	/* lacpdu->reserved_3_1              initialized
 	 * lacpdu->tlv_type_partner_info     initialized
@@ -901,7 +906,9 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
 			if ((port->sm_vars & AD_PORT_SELECTED) &&
 			    (port->partner_oper.port_state & AD_STATE_SYNCHRONIZATION) &&
 			    !__check_agg_selection_timer(port)) {
-				port->sm_mux_state = AD_MUX_COLLECTING_DISTRIBUTING;
+				if (port->aggregator->is_active)
+					port->sm_mux_state =
+					    AD_MUX_COLLECTING_DISTRIBUTING;
 			} else if (!(port->sm_vars & AD_PORT_SELECTED) ||
 				   (port->sm_vars & AD_PORT_STANDBY)) {
 				/* if UNSELECTED or STANDBY */
@@ -913,12 +920,18 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
 				 */
 				__set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator));
 				port->sm_mux_state = AD_MUX_DETACHED;
+			} else if (port->aggregator->is_active) {
+				port->actor_oper_port_state |=
+				    AD_STATE_SYNCHRONIZATION;
 			}
 			break;
 		case AD_MUX_COLLECTING_DISTRIBUTING:
 			if (!(port->sm_vars & AD_PORT_SELECTED) ||
 			    (port->sm_vars & AD_PORT_STANDBY) ||
-			    !(port->partner_oper.port_state & AD_STATE_SYNCHRONIZATION)) {
+			    !(port->partner_oper.port_state &
+				AD_STATE_SYNCHRONIZATION) ||
+			    !(port->actor_oper_port_state &
+				AD_STATE_SYNCHRONIZATION)) {
 				port->sm_mux_state = AD_MUX_ATTACHED;
 			} else {
 				/* if port state hasn't changed make
@@ -940,8 +953,10 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
 
 	/* check if the state machine was changed */
 	if (port->sm_mux_state != last_state) {
-		pr_debug("Mux Machine: Port=%d, Last State=%d, Curr State=%d\n",
-			 port->actor_port_number, last_state,
+		pr_debug("Mux Machine: Port=%d (%s), Last State=%d, Curr State=%d\n",
+			 port->actor_port_number,
+			 port->slave->dev->name,
+			 last_state,
 			 port->sm_mux_state);
 		switch (port->sm_mux_state) {
 		case AD_MUX_DETACHED:
@@ -956,7 +971,12 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
 			port->sm_mux_timer_counter = __ad_timer_to_ticks(AD_WAIT_WHILE_TIMER, 0);
 			break;
 		case AD_MUX_ATTACHED:
-			port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION;
+			if (port->aggregator->is_active)
+				port->actor_oper_port_state |=
+				    AD_STATE_SYNCHRONIZATION;
+			else
+				port->actor_oper_port_state &=
+				    ~AD_STATE_SYNCHRONIZATION;
 			port->actor_oper_port_state &= ~AD_STATE_COLLECTING;
 			port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING;
 			ad_disable_collecting_distributing(port,
@@ -966,6 +986,7 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
 		case AD_MUX_COLLECTING_DISTRIBUTING:
 			port->actor_oper_port_state |= AD_STATE_COLLECTING;
 			port->actor_oper_port_state |= AD_STATE_DISTRIBUTING;
+			port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION;
 			ad_enable_collecting_distributing(port,
 							  update_slave_arr);
 			port->ntt = true;
@@ -1047,8 +1068,10 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
 
 	/* check if the State machine was changed or new lacpdu arrived */
 	if ((port->sm_rx_state != last_state) || (lacpdu)) {
-		pr_debug("Rx Machine: Port=%d, Last State=%d, Curr State=%d\n",
-			 port->actor_port_number, last_state,
+		pr_debug("Rx Machine: Port=%d (%s), Last State=%d, Curr State=%d\n",
+			 port->actor_port_number,
+			 port->slave->dev->name,
+			 last_state,
 			 port->sm_rx_state);
 		switch (port->sm_rx_state) {
 		case AD_RX_INITIALIZE:
@@ -1397,6 +1420,9 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr)
 
 	aggregator = __get_first_agg(port);
 	ad_agg_selection_logic(aggregator, update_slave_arr);
+
+	if (!port->aggregator->is_active)
+		port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION;
 }
 
 /* Decide if "agg" is a better choice for the new active aggregator that
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH net-next 2/5] bonding: fix bond_open() don't always set slave active flag
From: Jonathan Toppins @ 2015-01-16 15:57 UTC (permalink / raw)
  To: netdev; +Cc: Andy Gospodarek, Wilson Kok, Jonathan Toppins
In-Reply-To: <1421423848-414-1-git-send-email-jtoppins@cumulusnetworks.com>

From: Wilson Kok <wkok@cumulusnetworks.com>

Mode 802.3ad, fix incorrect bond slave active state when slave is not in
active aggregator. During bond_open(), the bonding driver always sets
the slave active flag to true if the bond is not in active-backup, alb,
or tlb modes. Bonding should let the aggregator selection logic set the
active flag when in 802.3ad mode.

Cc: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: Wilson Kok <wkok@cumulusnetworks.com>
Signed-off-by: Jonathan Toppins <jtoppins@cumulusnetworks.com>
---
 drivers/net/bonding/bond_main.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 02ffedb..c475d90 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3066,7 +3066,7 @@ static int bond_open(struct net_device *bond_dev)
 			    slave != rcu_access_pointer(bond->curr_active_slave)) {
 				bond_set_slave_inactive_flags(slave,
 							      BOND_SLAVE_NOTIFY_NOW);
-			} else {
+			} else if (BOND_MODE(bond) != BOND_MODE_8023AD) {
 				bond_set_slave_active_flags(slave,
 							    BOND_SLAVE_NOTIFY_NOW);
 			}
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH net-next 1/5] bonding: keep bond interface carrier off until at least one active member
From: Jonathan Toppins @ 2015-01-16 15:57 UTC (permalink / raw)
  To: netdev; +Cc: Scott Feldman, Andy Gospodarek, Jonathan Toppins
In-Reply-To: <1421423848-414-1-git-send-email-jtoppins@cumulusnetworks.com>

From: Scott Feldman <sfeldma@cumulusnetworks.com>

Bonding driver parameter min_links is now used to signal upper-level
protocols of bond status. The way it works is if the total number of
active members in slaves drops below min_links, the bond link carrier
will go down, signaling upper levels that bond is inactive.  When active
members returns to >= min_links, bond link carrier will go up (RUNNING),
and protocols can resume.  When bond is carrier down, member ports are
in stp fwd state blocked (rather than normal disabled state), so
low-level ctrl protocols (LACP) can still get in and be processed by
bonding driver.

LACP will still do it's job while bond is carrier off, and if bond members
become active, bond carrier will be turned back on, signaling higher-level
protocols that bond is viable.

Suggested setting of min_links is 1, rather than the default of zero.
Using min_links=1 says that at least 1 slave must be active within bond
for bond to be carrier on.

Finally, when min_links bonding option is changed update carrier status.

Cc: Scott Feldman <sfeldma@gmail.com>
Cc: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: Jonathan Toppins <jtoppins@cumulusnetworks.com>
---
 drivers/net/bonding/bond_3ad.c     |   18 ++++++++++++++----
 drivers/net/bonding/bond_main.c    |    2 +-
 drivers/net/bonding/bond_options.c |    1 +
 include/net/bonding.h              |    1 +
 4 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 8baa87d..e9b706f 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -189,6 +189,7 @@ static inline int __agg_has_partner(struct aggregator *agg)
 static inline void __disable_port(struct port *port)
 {
 	bond_set_slave_inactive_flags(port->slave, BOND_SLAVE_NOTIFY_LATER);
+	bond_3ad_set_carrier(port->slave->bond);
 }
 
 /**
@@ -199,8 +200,10 @@ static inline void __enable_port(struct port *port)
 {
 	struct slave *slave = port->slave;
 
-	if ((slave->link == BOND_LINK_UP) && bond_slave_is_up(slave))
+	if ((slave->link == BOND_LINK_UP) && bond_slave_is_up(slave)) {
 		bond_set_slave_active_flags(slave, BOND_SLAVE_NOTIFY_LATER);
+		bond_3ad_set_carrier(slave->bond);
+	}
 }
 
 /**
@@ -2372,8 +2375,10 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)
 int bond_3ad_set_carrier(struct bonding *bond)
 {
 	struct aggregator *active;
-	struct slave *first_slave;
+	struct slave *first_slave, *slave;
+	struct list_head *iter;
 	int ret = 1;
+	int active_slaves = 0;
 
 	rcu_read_lock();
 	first_slave = bond_first_slave_rcu(bond);
@@ -2381,10 +2386,15 @@ int bond_3ad_set_carrier(struct bonding *bond)
 		ret = 0;
 		goto out;
 	}
+
+	bond_for_each_slave_rcu(bond, slave, iter)
+		if (SLAVE_AD_INFO(slave)->aggregator.is_active)
+			active_slaves++;
+
 	active = __get_active_agg(&(SLAVE_AD_INFO(first_slave)->aggregator));
-	if (active) {
+	if (active && __agg_has_partner(active)) {
 		/* are enough slaves available to consider link up? */
-		if (active->num_of_ports < bond->params.min_links) {
+		if (active_slaves < bond->params.min_links) {
 			if (netif_carrier_ok(bond->dev)) {
 				netif_carrier_off(bond->dev);
 				goto out;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 0dceba1..02ffedb 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -334,7 +334,7 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev,
  *
  * Returns zero if carrier state does not change, nonzero if it does.
  */
-static int bond_set_carrier(struct bonding *bond)
+int bond_set_carrier(struct bonding *bond)
 {
 	struct list_head *iter;
 	struct slave *slave;
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 9bd538d4..4df2894 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -1181,6 +1181,7 @@ static int bond_option_min_links_set(struct bonding *bond,
 	netdev_info(bond->dev, "Setting min links value to %llu\n",
 		    newval->value);
 	bond->params.min_links = newval->value;
+	bond_set_carrier(bond);
 
 	return 0;
 }
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 983a94b..29f53ea 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -525,6 +525,7 @@ void bond_sysfs_slave_del(struct slave *slave);
 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev);
 int bond_release(struct net_device *bond_dev, struct net_device *slave_dev);
 u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb);
+int bond_set_carrier(struct bonding *bond);
 void bond_select_active_slave(struct bonding *bond);
 void bond_change_active_slave(struct bonding *bond, struct slave *new_active);
 void bond_create_debugfs(void);
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH net-next 0/5] bonding: various 802.3ad fixes
From: Jonathan Toppins @ 2015-01-16 15:57 UTC (permalink / raw)
  To: netdev

This patch series is a forward porting of patches we (Cumulus) are shipping
in our 3.2 series kernels. These fixes attempt to make 802.3ad bonding mode
more predictable in certian state machine transtions in addition to enhancing
802.3ad bond carrier determination based on acutal number of peered ports plus
if the bond has an active aggregrator. Specific notes are contained within each
patch.

For this patch series there are no userspace facing changes, a diff between
the modinfo output showed no difference. However, there are behavioral
facing changes, primarily in the bond carrier state. Please make sure to
review carefully.

Jonathan Toppins (1):
  bonding: cleanup and remove dead code

Satish Ashok (1):
  bonding: fix LACP PDU not sent on slave port sometimes

Scott Feldman (1):
  bonding: keep bond interface carrier off until at least one active
    member

Wilson Kok (2):
  bonding: fix bond_open() don't always set slave active flag
  bonding: fix incorrect lacp mux state when agg not active

 drivers/net/bonding/bond_3ad.c     |   73 ++++++++++++++++++++++++++++--------
 drivers/net/bonding/bond_main.c    |    6 +--
 drivers/net/bonding/bond_options.c |    1 +
 include/net/bond_3ad.h             |    1 -
 include/net/bonding.h              |    1 +
 5 files changed, 62 insertions(+), 20 deletions(-)

-- 
1.7.10.4

^ permalink raw reply

* Re: [PATCH 1/9] rhashtable: Do hashing inside of rhashtable_lookup_compare()
From: Patrick McHardy @ 2015-01-16 15:37 UTC (permalink / raw)
  To: Thomas Graf
  Cc: davem, netdev, linux-kernel, herbert, paulmck, edumazet,
	john.r.fastabend, josh, netfilter-devel
In-Reply-To: <d579f486c3d4a88744dffc23a794601b025b41f4.1420230585.git.tgraf@suug.ch>

On 02.01, Thomas Graf wrote:
> Hash the key inside of rhashtable_lookup_compare() like
> rhashtable_lookup() does. This allows to simplify the hashing
> functions and keep them private.

One more question:

> diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
> index 1e316ce..614ee09 100644
> --- a/net/netfilter/nft_hash.c
> +++ b/net/netfilter/nft_hash.c
> @@ -94,28 +94,40 @@ static void nft_hash_remove(const struct nft_set *set,
>  	kfree(he);
>  }
>  
> +struct nft_compare_arg {
> +	const struct nft_set *set;
> +	struct nft_set_elem *elem;
> +};
> +
> +static bool nft_hash_compare(void *ptr, void *arg)
> +{
> +	struct nft_hash_elem *he = ptr;
> +	struct nft_compare_arg *x = arg;
> +
> +	if (!nft_data_cmp(&he->key, &x->elem->key, x->set->klen)) {
> +		x->elem->cookie = &he->node;
> +		x->elem->flags = 0;
> +		if (x->set->flags & NFT_SET_MAP)
> +			nft_data_copy(&x->elem->data, he->data);

Is there any reason why we need to perform the assignments in the
compare function? The reason why I'm asking is because to add
timeout support, I need another compare function for nft_hash_lookup()
and I'd prefer to use a single one for both cases.

> +
> +		return true;
> +	}
> +
> +	return false;
> +}
> +
>  static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
>  {
>  	const struct rhashtable *priv = nft_set_priv(set);
> -	const struct bucket_table *tbl = rht_dereference_rcu(priv->tbl, priv);
> -	struct rhash_head __rcu * const *pprev;
> -	struct nft_hash_elem *he;
> -	u32 h;
> -
> -	h = rhashtable_hashfn(priv, &elem->key, set->klen);
> -	pprev = &tbl->buckets[h];
> -	rht_for_each_entry_rcu(he, tbl->buckets[h], node) {
> -		if (nft_data_cmp(&he->key, &elem->key, set->klen)) {
> -			pprev = &he->node.next;
> -			continue;
> -		}
> +	struct nft_compare_arg arg = {
> +		.set = set,
> +		.elem = elem,
> +	};
>  
> -		elem->cookie = (void *)pprev;
> -		elem->flags = 0;
> -		if (set->flags & NFT_SET_MAP)
> -			nft_data_copy(&elem->data, he->data);
> +	if (rhashtable_lookup_compare(priv, &elem->key,
> +				      &nft_hash_compare, &arg))
>  		return 0;
> -	}
> +
>  	return -ENOENT;
>  }
>  

^ permalink raw reply

* Re: [PATCH 7/9] rhashtable: Per bucket locks & deferred expansion/shrinking
From: Patrick McHardy @ 2015-01-16 15:34 UTC (permalink / raw)
  To: Thomas Graf
  Cc: davem, netdev, kernel, herbert, paulmck, edumazet,
	john.r.fastabend, josh, netfilter-devel
In-Reply-To: <75db38bc9313a55cf02a8c36a3376c32b691e5d9.1418647641.git.tgraf@suug.ch>

On 15.12, Thomas Graf wrote:
> The patch also defers expansion and shrinking to a worker queue which
> allows insertion and removal from atomic context. Insertions and
> deletions may occur in parallel to it and are only held up briefly
> while the particular bucket is linked or unzipped.
>
> Mutations of the bucket table pointer is protected by a new mutex, read
> access is RCU protected.
> 
> In the event of an expansion or shrinking, the new bucket table allocated
> is exposed as a so called future table as soon as the resize process
> starts.  Lookups, deletions, and insertions will briefly use both tables.
> The future table becomes the main table after an RCU grace period and
> initial linking of the old to the new table was performed. Optimization
> of the chains to make use of the new number of buckets follows only the
> new table is in use.

AFAICT nft_hash_walk() will miss new entries during this period.
Am I missing anything here?

^ permalink raw reply

* Re: [PATCH tip 0/9] tracing: attach eBPF programs to tracepoints/syscalls/kprobe
From: Steven Rostedt @ 2015-01-16 15:02 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Ingo Molnar, Namhyung Kim, Arnaldo Carvalho de Melo, Jiri Olsa,
	David S. Miller, Daniel Borkmann, Hannes Frederic Sowa,
	Brendan Gregg, linux-api-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1421381770-4866-1-git-send-email-ast-uqk4Ao+rVK5Wk0Htik3J/w@public.gmane.org>

On Thu, 15 Jan 2015 20:16:01 -0800
Alexei Starovoitov <ast-uqk4Ao+rVK5Wk0Htik3J/w@public.gmane.org> wrote:

> Hi Ingo, Steven,
> 
> This patch set is based on tip/master.

Note, the tracing code isn't maintained in tip/master, but perf code is.

Using the latest 3.19-rc is probably sufficient for now.

Do you have a git repo somewhere that I can look at? It makes it easier
than loading in 9 patches ;-)

> It adds ability to attach eBPF programs to tracepoints, syscalls and kprobes.
> 
> Mechanism of attaching:
> - load program via bpf() syscall and receive program_fd
> - event_fd = open("/sys/kernel/debug/tracing/events/.../filter")
> - write 'bpf-123' to event_fd where 123 is program_fd
> - program will be attached to particular event and event automatically enabled
> - close(event_fd) will detach bpf program from event and event disabled
> 
> Program attach point and input arguments:
> - programs attached to kprobes receive 'struct pt_regs *' as an input.
>   See tracex4_kern.c that demonstrates how users can write a C program like:
>   SEC("events/kprobes/sys_write")
>   int bpf_prog4(struct pt_regs *regs)
>   {
>      long write_size = regs->dx; 
>      // here user need to know the proto of sys_write() from kernel
>      // sources and x64 calling convention to know that register $rdx
>      // contains 3rd argument to sys_write() which is 'size_t count'
> 
>   it's obviously architecture dependent, but allows building sophisticated
>   user tools on top, that can see from debug info of vmlinux which variables
>   are in which registers or stack locations and fetch it from there.
>   'perf probe' can potentialy use this hook to generate programs in user space
>   and insert them instead of letting kernel parse string during kprobe creation.
> 
> - programs attached to tracepoints and syscalls receive 'struct bpf_context *':
>   u64 arg1, arg2, ..., arg6;
>   for syscalls they match syscall arguments.
>   for tracepoints these args match arguments passed to tracepoint.
>   For example:
>   trace_sched_migrate_task(p, new_cpu); from sched/core.c
>   arg1 <- p        which is 'struct task_struct *'
>   arg2 <- new_cpu  which is 'unsigned int'
>   arg3..arg6 = 0
>   the program can use bpf_fetch_u8/16/32/64/ptr() helpers to walk 'task_struct'
>   or any other kernel data structures.
>   These helpers are using probe_kernel_read() similar to 'perf probe' which is
>   not 100% safe in both cases, but good enough.
>   To access task_struct's pid inside 'sched_migrate_task' tracepoint
>   the program can do:
>   struct task_struct *task = (struct task_struct *)ctx->arg1;
>   u32 pid = bpf_fetch_u32(&task->pid);
>   Since struct layout is kernel configuration specific such programs are not
>   portable and require access to kernel headers to be compiled,
>   but in this case we don't need debug info.
>   llvm with bpf backend will statically compute task->pid offset as a constant
>   based on kernel headers only.
>   The example of this arbitrary pointer walking is tracex1_kern.c
>   which does skb->dev->name == "lo" filtering.
> 
> In all cases the programs are called before trace buffer is allocated to
> minimize the overhead, since we want to filter huge number of events, but
> buffer alloc/free and argument copy for every event is too costly.

For syscalls this is fine as the parameters are usually set. But
there's a lot of tracepoints that we need to know the result of the
copied data to decide to filter or not, where the result happens at the
TP_fast_assign() part which requires allocating the buffers.

Maybe we should have a way to do the program before and/or after the
buffering depending on what to filter on. There's no way to know what
the parameters of the tracepoint are without looking at the source.



> Theoretically we can invoke programs after buffer is allocated, but it
> doesn't seem needed, since above approach is faster and achieves the same.

Again, for syscalls it may not be a problem, but for other tracepoints,
I'm not sure we can do that. How do you handle sched_switch for
example? The tracepoint only gets two pointers to task structs, you
need to then dereference them to get the pid, prio, state and other
data.

> 
> Note, tracepoint/syscall and kprobe programs are two different types:
> BPF_PROG_TYPE_TRACING_FILTER and BPF_PROG_TYPE_KPROBE_FILTER,
> since they expect different input.
> Both use the same set of helper functions:
> - map access (lookup/update/delete)
> - fetch (probe_kernel_read wrappers)
> - memcmp (probe_kernel_read + memcmp)
> - dump_stack
> - trace_printk
> The last two are mainly to debug the programs and to print data for user
> space consumptions.

I have to look at the code, but currently trace_printk() isn't made to
be used in production systems.

> 
> Portability:
> - kprobe programs are architecture dependent and need user scripting
>   language like ktap/stap/dtrace/perf that will dynamically generate
>   them based on debug info in vmlinux
> - tracepoint programs are architecture independent, but if arbitrary pointer
>   walking (with fetch() helpers) is used, they need data struct layout to match.
>   Debug info is not necessary

If the program runs after the buffers are allocated, it could still be
architecture independent because ftrace gives the information on how to
retrieve the fields.

One last thing. If the ebpf is used for anything but filtering, it
should go into the trigger file. The filtering is only a way to say if
the event should be recorded or not. But the trigger could do something
else (a printk, a stacktrace, etc).

-- Steve


> - for networking use case we need to access 'struct sk_buff' fields in portable
>   way (user space needs to fetch packet length without knowing skb->len offset),
>   so for some frequently used data structures we will add helper functions
>   or pseudo instructions to access them. I've hacked few ways specifically
>   for skb, but abandoned them in favor of more generic type/field infra.
>   That work is still wip. Not part of this set.
>   Once it's ready tracepoint programs that access common data structs
>   will be kernel independent.
> 
> Program return value:
> - programs return 0 to discard an event
> - and return non-zero to proceed with event (allocate trace buffer, copy
>   arguments there and print it eventually in trace_pipe in traditional way)
> 
> Examples:
> - dropmon.c - simple kfree_skb() accounting in eBPF assembler, similar
>   to dropmon tool
> - tracex1_kern.c - does net/netif_receive_skb event filtering
>   for dev->skb->name == "lo" condition
> - tracex2_kern.c - same kfree_skb() accounting like dropmon, but now in C
>   plus computes histogram of all write sizes from sys_write syscall
>   and prints the histogram in userspace
> - tracex3_kern.c - most sophisticated example that computes IO latency
>   between block/block_rq_issue and block/block_rq_complete events
>   and prints 'heatmap' using gray shades of text terminal.
>   Useful to analyze disk performance.
> - tracex4_kern.c - computes histogram of write sizes from sys_write syscall
>   using kprobe mechanism instead of syscall. Since kprobe is optimized into
>   ftrace the overhead of instrumentation is smaller than in example 2.
> 
> The user space tools like ktap/dtrace/systemptap/perf that has access
> to debug info would probably want to use kprobe attachment point, since kprobe
> can be inserted anywhere and all registers are avaiable in the program.
> tracepoint attachments are useful without debug info, so standalone tools
> like iosnoop will use them.
> 
> The main difference vs existing perf_probe/ftrace infra is in kernel aggregation
> and conditional walking of arbitrary data structures.
> 
> Thanks!
> 
> Alexei Starovoitov (9):
>   tracing: attach eBPF programs to tracepoints and syscalls
>   tracing: allow eBPF programs to call bpf_printk()
>   tracing: allow eBPF programs to call ktime_get_ns()
>   samples: bpf: simple tracing example in eBPF assembler
>   samples: bpf: simple tracing example in C
>   samples: bpf: counting example for kfree_skb tracepoint and write
>     syscall
>   samples: bpf: IO latency analysis (iosnoop/heatmap)
>   tracing: attach eBPF programs to kprobe/kretprobe
>   samples: bpf: simple kprobe example
> 
>  include/linux/ftrace_event.h       |    6 +
>  include/trace/bpf_trace.h          |   25 ++++
>  include/trace/ftrace.h             |   30 +++++
>  include/uapi/linux/bpf.h           |   11 ++
>  kernel/trace/Kconfig               |    1 +
>  kernel/trace/Makefile              |    1 +
>  kernel/trace/bpf_trace.c           |  250 ++++++++++++++++++++++++++++++++++++
>  kernel/trace/trace.h               |    3 +
>  kernel/trace/trace_events.c        |   41 +++++-
>  kernel/trace/trace_events_filter.c |   80 +++++++++++-
>  kernel/trace/trace_kprobe.c        |   11 +-
>  kernel/trace/trace_syscalls.c      |   31 +++++
>  samples/bpf/Makefile               |   18 +++
>  samples/bpf/bpf_helpers.h          |   18 +++
>  samples/bpf/bpf_load.c             |   62 ++++++++-
>  samples/bpf/bpf_load.h             |    3 +
>  samples/bpf/dropmon.c              |  129 +++++++++++++++++++
>  samples/bpf/tracex1_kern.c         |   28 ++++
>  samples/bpf/tracex1_user.c         |   24 ++++
>  samples/bpf/tracex2_kern.c         |   71 ++++++++++
>  samples/bpf/tracex2_user.c         |   95 ++++++++++++++
>  samples/bpf/tracex3_kern.c         |   96 ++++++++++++++
>  samples/bpf/tracex3_user.c         |  146 +++++++++++++++++++++
>  samples/bpf/tracex4_kern.c         |   36 ++++++
>  samples/bpf/tracex4_user.c         |   83 ++++++++++++
>  25 files changed, 1290 insertions(+), 9 deletions(-)
>  create mode 100644 include/trace/bpf_trace.h
>  create mode 100644 kernel/trace/bpf_trace.c
>  create mode 100644 samples/bpf/dropmon.c
>  create mode 100644 samples/bpf/tracex1_kern.c
>  create mode 100644 samples/bpf/tracex1_user.c
>  create mode 100644 samples/bpf/tracex2_kern.c
>  create mode 100644 samples/bpf/tracex2_user.c
>  create mode 100644 samples/bpf/tracex3_kern.c
>  create mode 100644 samples/bpf/tracex3_user.c
>  create mode 100644 samples/bpf/tracex4_kern.c
>  create mode 100644 samples/bpf/tracex4_user.c
> 

^ permalink raw reply

* Re: [net-next PATCH v3 1/1] atm: remove deprecated use of pci api
From: Quentin Lambert @ 2015-01-16 14:54 UTC (permalink / raw)
  To: chas williams - CONTRACTOR
  Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <20150116095244.40249284@thirdoffive.cmf.nrl.navy.mil>


On 16/01/2015 15:52, chas williams - CONTRACTOR wrote:
> On Fri, 16 Jan 2015 15:10:25 +0100
> Quentin Lambert <lambert.quentin@gmail.com> wrote:
>
>
>>> -    u32 dma_addr = pci_map_single((struct pci_dev*)fore200e->bus_dev, virt_addr, size, direction);
>>> +    u32 dma_addr = dma_map_single(&((struct pci_dev *) fore200e->bus_dev)->dev, virt_addr, size, direction);
>>>    
>>>        DPRINTK(3, "PCI DVMA mapping: virt_addr = 0x%p, size = %d, direction = %d,  --> dma_addr = 0x%08x\n",
>>>    	    virt_addr, size, direction, dma_addr);
>>>
>> []
>>
>> I am going try to make similar changes in some other part of the kernel and
>> I was wondering if you could explain how you decided it wasn't necessary to
>> check for "((struct pci_dev *) fore200e->bus_dev" nullity for instance.
> This gets set up in fore200e_pca_detect() which is pretty early in the
> intialization process.  We don't get as far as using any of the "DVMA"
> stubs unless pci_enable_device() succeeds, meaning pci_dev is good, and
> fore200e->bus_dev is assigned to pci_dev (around line 2724).
>
> fore200e->bus_dev is never cleared back to NULL, but obviously you
> shouldn't be using any of the DMA routines after disabling the pci
> device.  Hopefully the driver shuts down in an orderly fashion such
> that all DMA is over by the time the driver disables the pci device.
Thank you

^ permalink raw reply

* Re: [net-next PATCH v3 1/1] atm: remove deprecated use of pci api
From: chas williams - CONTRACTOR @ 2015-01-16 14:52 UTC (permalink / raw)
  To: Quentin Lambert; +Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <54B91BD1.70909@gmail.com>

On Fri, 16 Jan 2015 15:10:25 +0100
Quentin Lambert <lambert.quentin@gmail.com> wrote:


> > -    u32 dma_addr = pci_map_single((struct pci_dev*)fore200e->bus_dev, virt_addr, size, direction);
> > +    u32 dma_addr = dma_map_single(&((struct pci_dev *) fore200e->bus_dev)->dev, virt_addr, size, direction);
> >   
> >       DPRINTK(3, "PCI DVMA mapping: virt_addr = 0x%p, size = %d, direction = %d,  --> dma_addr = 0x%08x\n",
> >   	    virt_addr, size, direction, dma_addr);
> >
> []
> 
> I am going try to make similar changes in some other part of the kernel and
> I was wondering if you could explain how you decided it wasn't necessary to
> check for "((struct pci_dev *) fore200e->bus_dev" nullity for instance.

This gets set up in fore200e_pca_detect() which is pretty early in the
intialization process.  We don't get as far as using any of the "DVMA"
stubs unless pci_enable_device() succeeds, meaning pci_dev is good, and
fore200e->bus_dev is assigned to pci_dev (around line 2724).

fore200e->bus_dev is never cleared back to NULL, but obviously you
shouldn't be using any of the DMA routines after disabling the pci
device.  Hopefully the driver shuts down in an orderly fashion such
that all DMA is over by the time the driver disables the pci device.

^ permalink raw reply

* Re: [PATCH] i40e: don't enable and init FCOE by default when do PF reset
From: Jeff Kirsher @ 2015-01-16 14:47 UTC (permalink / raw)
  To: ethan zhao
  Cc: Dev, Vasu, Ethan Zhao, Ronciak, John, Brandeburg, Jesse,
	Allan, Bruce W, Wyborny, Carolyn, Skidmore, Donald C,
	Rose, Gregory V, Vick, Matthew, Williams, Mitch A, Parikh, Neerav,
	Linux NICS, e1000-devel@lists.sourceforge.net,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	brian.maly@oracle.com
In-Reply-To: <54B86DF8.3040206@oracle.com>

[-- Attachment #1: Type: text/plain, Size: 9970 bytes --]

On Fri, 2015-01-16 at 09:48 +0800, ethan zhao wrote:
> Vasu,
> 
>     OK, disable FCOE as default configuration as a temporary step to 
> make it  work.

Sounds like I should expect a v2 coming, correct?

> 
> 
> Thanks,
> Ethan
> 
> On 2015/1/16 7:45, Dev, Vasu wrote:
> >> -----Original Message-----
> >> From: ethan zhao [mailto:ethan.zhao@oracle.com]
> >> Sent: Tuesday, January 13, 2015 6:41 PM
> >> To: Dev, Vasu
> >> Cc: Ethan Zhao; Ronciak, John; Kirsher, Jeffrey T; Brandeburg, Jesse; Allan,
> >> Bruce W; Wyborny, Carolyn; Skidmore, Donald C; Rose, Gregory V; Vick,
> >> Matthew; Williams, Mitch A; Parikh, Neerav; Linux NICS; e1000-
> >> devel@lists.sourceforge.net; netdev@vger.kernel.org; linux-
> >> kernel@vger.kernel.org; brian.maly@oracle.com
> >> Subject: Re: [PATCH] i40e: don't enable and init FCOE by default when do PF
> >> reset
> >>
> >> Vasu,
> >>
> >> On 2015/1/14 3:38, Dev, Vasu wrote:
> >>>> -----Original Message-----
> >>>>>>> diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c
> >>>>>>> b/drivers/net/ethernet/intel/i40e/i40e_main.c
> >>>>>>> index a5f2660..a2572cc 100644
> >>>>>>> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> >>>>>>> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> >>>>>>> @@ -6180,9 +6180,12 @@ static void i40e_reset_and_rebuild(struct
> >>>>>>> i40e_pf *pf, bool reinit)
> >>>>>>>       }
> >>>>>>>    #endif /* CONFIG_I40E_DCB */
> >>>>>>>    #ifdef I40E_FCOE
> >>>>>>> -   ret = i40e_init_pf_fcoe(pf);
> >>>>>>> -   if (ret)
> >>>>>>> -           dev_info(&pf->pdev->dev, "init_pf_fcoe failed: %d\n", ret);
> >>>>>>> +   if (pf->flags & I40E_FLAG_FCOE_ENABLED) {
> >>>>>>> +           ret = i40e_init_pf_fcoe(pf);
> >>>>> Calling i40e_init_pf_fcoe() here conflicts with its
> >>>> I40E_FLAG_FCOE_ENABLED pre-condition since
> >> I40E_FLAG_FCOE_ENABLED is
> >>>> set by very same i40e_init_pf_fcoe(), in turn i40e_init_pf_fcoe()
> >>>> will never get called.
> >>>>
> >>>> I don't think so,  here ,i40e_reset_and_rebuild()  is not the only
> >>>> and the first place that  i40e_init_pf_fcoe() is called, see
> >>>> i40e_probe(), that is the first chance.
> >>>>
> >>>> i40e_probe()
> >>>> -->i40e_sw_init()
> >>>>        -->i40e_init_pf_fcoe()
> >>>>
> >>>> And the I40E_FLAG_FCOE_ENABLED is possible be set by
> >>>> i40e_fcoe_enable() or i40e_fcoe_disable() interface before the reset
> >>>> action is to be done.
> >>>>
> >>> It is set by i40e_init_pf_fcoe() and you are right that the modified call flow
> >> by your patch won't impact setting of I40E_FLAG_FCOE_ENABLED anyway
> >> which could have prevented calling i40e_init_pf_fcoe() as I described above,
> >> so this is not an issue with the patch.
> >>>> BTW, the reason I post this patch is that we hit a bug, after setup
> >>>> vlan, the PF is enabled to FCOE.
> >>>>
> >>> Then that BUG would still remain un-fixed and calling i40e_init_pf_fcoe()
> >> under I40E_FLAG_FCOE_ENABLED  flag really won't affect call flow to fix
> >> anything. I mean I40E_FLAG_FCOE_ENABLED  condition will be true with "pf-
> >>> hw.func_caps.fcoe == true" and otherwise calling i40e_init_pf_fcoe() simply
> >> returns back early on after checking "pf->hw.func_caps.fcoe == false", so
> >> how that bug is fixed here by added I40E_FLAG_FCOE_ENABLED  condition ?
> >> What is the bug ?
> >>    The func_caps.fcoe is assigned by following call path, under our test
> >> environment,
> >>
> >>    i40e_probe()
> >>     ->i40e_get_capabilities()
> >>        ->i40e_aq_discover_capabilities()
> >>           ->i40e_parse_discover_capabilities()
> >>
> >>    Or
> >>
> >>    i40e_reset_and_rebuild()
> >>     ->i40e_get_capabilities()
> >>       ->i40e_aq_discover_capabilities()
> >>         ->i40e_parse_discover_capabilities()
> >>
> >>    Under our test environment, the "pf->hw.func_caps.fcoe" is true. so if
> >> i40e_reset_and_rebuild() is called for VLAN setup, ethtool diagnostic test.
> >>    And then i40e_init_pf_fcoe() is to be called,
> >>
> >>    While if (!pf->hw.func_caps.fcoe) wouldn't return,
> >>
> > I said it would return with "pf->hw.func_caps.fcoe == false" in my last response, more details below.
> >
> >>    So  pf->flags is set to I40E_FLAG_FCOE_ENABLED.
> >>
> >>    With my patch,  i40e_init_pf_fcoe() is only called after
> >> I40E_FLAG_FCOE_ENABLED is set before reset.
> >>
> >> Enable FCOE in i40e_probe() or not is another issue.
> >>
> > Nope since both cases we should do i40e_init_pf_fcoe() or don't based on fcoe cap true or false.
> >
> > I don't have much to add as I described before with the your patch that "calling i40e_init_pf_fcoe() under I40E_FLAG_FCOE_ENABLED  flag really won't affect call flow to fix anything. I mean I40E_FLAG_FCOE_ENABLED  condition will be true with "pf->hw.func_caps.fcoe == true" and otherwise calling i40e_init_pf_fcoe() simply returns back early on after checking "pf->hw.func_caps.fcoe == false".
> >
> > May be I'm missing something, I guess next either go with CONFIG_I40E_FCOE disable as I suggested before and now it in upstream kernel or we can have further off list discussion to fix the issue you are trying to fix with the patch.
> >
> > Thanks,
> > Vasu
> >
> >> Thanks,
> >> Ethan
> >>
> >>
> >>>>> Jeff Kirsher should be getting out a patch queued by me which adds
> >>>> I40E_FCoE Kbuild option, in that FCoE is disabled by default and
> >>>> user could enable FCoE only if needed, that patch would do same of
> >>>> skipping
> >>>> i40e_init_pf_fcoe() whether FCoE capability in device enabled or not
> >>>> in default config.
> >>>> The following patch will not fix the above issue -- configuration of
> >>>> PF will be changed via reset.
> >>>> How about the FCOE is configured and disabled by  i40e_fcoe_disable()
> >>>> , then reset happens ?
> >>>>
> >>> May be but if the BUG is due to FCoE being enabled then having it disabled
> >> in config will avoid the bug for non FCoE config option and once bug is
> >> understood then that has to be fixed for FCoE enabled config also as I asked
> >> above.
> >>> Thanks Ethan for detailed response.
> >>> Vasu
> >>>
> >>>>>   From patchwork Wed Oct  2 23:26:08 2013
> >>>>> Content-Type: text/plain; charset="utf-8"
> >>>>> MIME-Version: 1.0
> >>>>> Content-Transfer-Encoding: 7bit
> >>>>> Subject: [net] i40e: adds FCoE configure option
> >>>>> Date: Thu, 03 Oct 2013 07:26:08 -0000
> >>>>> From: Vasu Dev <vasu.dev@intel.com>
> >>>>> X-Patchwork-Id: 11797
> >>>>>
> >>>>> Adds FCoE config option I40E_FCOE, so that FCoE can be enabled as
> >>>>> needed but otherwise have it disabled by default.
> >>>>>
> >>>>> This also eliminate multiple FCoE config checks, instead now just
> >>>>> one config check for CONFIG_I40E_FCOE.
> >>>>>
> >>>>> The I40E FCoE was added with 3.17 kernel and therefore this patch
> >>>>> shall be applied to stable 3.17 kernel also.
> >>>>>
> >>>>> CC: <stable@vger.kernel.org>
> >>>>> Signed-off-by: Vasu Dev <vasu.dev@intel.com>
> >>>>> Tested-by: Jim Young <jamesx.m.young@intel.com>
> >>>>>
> >>>>> ---
> >>>>> drivers/net/ethernet/intel/Kconfig           |   11 +++++++++++
> >>>>>    drivers/net/ethernet/intel/i40e/Makefile     |    2 +-
> >>>>>    drivers/net/ethernet/intel/i40e/i40e_osdep.h |    4 ++--
> >>>>>    3 files changed, 14 insertions(+), 3 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/net/ethernet/intel/Kconfig
> >>>>> b/drivers/net/ethernet/intel/Kconfig
> >>>>> index 5b8300a..4d61ef5 100644
> >>>>> --- a/drivers/net/ethernet/intel/Kconfig
> >>>>> +++ b/drivers/net/ethernet/intel/Kconfig
> >>>>> @@ -281,6 +281,17 @@ config I40E_DCB
> >>>>>
> >>>>>             If unsure, say N.
> >>>>>
> >>>>> +config I40E_FCOE
> >>>>> +       bool "Fibre Channel over Ethernet (FCoE)"
> >>>>> +       default n
> >>>>> +       depends on I40E && DCB && FCOE
> >>>>> +       ---help---
> >>>>> +         Say Y here if you want to use Fibre Channel over Ethernet (FCoE)
> >>>>> +         in the driver. This will create new netdev for exclusive FCoE
> >>>>> +         use with XL710 FCoE offloads enabled.
> >>>>> +
> >>>>> +         If unsure, say N.
> >>>>> +
> >>>>>    config I40EVF
> >>>>>           tristate "Intel(R) XL710 X710 Virtual Function Ethernet support"
> >>>>>           depends on PCI_MSI
> >>>>> diff --git a/drivers/net/ethernet/intel/i40e/Makefile
> >>>>> b/drivers/net/ethernet/intel/i40e/Makefile
> >>>>> index 4b94ddb..c405819 100644
> >>>>> --- a/drivers/net/ethernet/intel/i40e/Makefile
> >>>>> +++ b/drivers/net/ethernet/intel/i40e/Makefile
> >>>>> @@ -44,4 +44,4 @@ i40e-objs := i40e_main.o \
> >>>>>           i40e_virtchnl_pf.o
> >>>>>
> >>>>>    i40e-$(CONFIG_I40E_DCB) += i40e_dcb.o i40e_dcb_nl.o
> >>>>> -i40e-$(CONFIG_FCOE:m=y) += i40e_fcoe.o
> >>>>> +i40e-$(CONFIG_I40E_FCOE) += i40e_fcoe.o
> >>>>> diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h
> >>>>> b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
> >>>>> index 045b5c4..ad802dd 100644
> >>>>> --- a/drivers/net/ethernet/intel/i40e/i40e_osdep.h
> >>>>> +++ b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
> >>>>> @@ -78,7 +78,7 @@ do {                                                            \
> >>>>>    } while (0)
> >>>>>
> >>>>>    typedef enum i40e_status_code i40e_status; -#if
> >>>>> defined(CONFIG_FCOE)
> >>>>> || defined(CONFIG_FCOE_MODULE)
> >>>>> +#ifdef CONFIG_I40E_FCOE
> >>>>>    #define I40E_FCOE
> >>>>> -#endif /* CONFIG_FCOE or CONFIG_FCOE_MODULE */
> >>>>> +#endif
> >>>>>    #endif /* _I40E_OSDEP_H_ */
> >>>>>
> >>>>>>> +           if (ret)
> >>>>>>> +                   dev_info(&pf->pdev->dev,
> >>>>>>> +                            "init_pf_fcoe failed: %d\n", ret);
> >>>>>>> +   }
> >>>>>>>
> >>>>>>>    #endif
> >>>>>>>       /* do basic switch setup */
> >>>>>>> --
> >>>>>>> 1.8.3.1
> >>>> Thanks,
> >>>> Ethan
> 



[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: [PATCHv2] ixgbe: Re-enable relaxed ordering as part of init/restart sequence for non-DCA config
From: Jeff Kirsher @ 2015-01-16 14:45 UTC (permalink / raw)
  To: Sowmini Varadhan
  Cc: jesse.brandeburg, bruce.w.allan, carolyn.wyborny,
	donald.c.skidmore, gregory.v.rose, matthew.vick, john.ronciak,
	mitch.a.williams, linux.nics, e1000-devel, netdev, linux-kernel,
	sparclinux, emil.s.tantilov
In-Reply-To: <20150115010352.GK24238@oracle.com>

[-- Attachment #1: Type: text/plain, Size: 1393 bytes --]

On Wed, 2015-01-14 at 20:03 -0500, Sowmini Varadhan wrote:
> Relaxed ordering is disabled by default at driver initialization
> and re-enabled when DCA is used. The reason it is disabled  was
> due to an issue on some chipsets (see comments in
> ixgbe_update_tx_dca()).
> But when DCA is not used, RO needs to be re-enabled, else we have
> a serialization bottleneck on platforms like SPARC.
> 
> This patch eliminates the bottleneck for ixgbe when DCA is not
> configured.
> 
> Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
> Cc: Emil Tantilov <emil.s.tantilov@intel.com>
> 
> ---
> v2: incoroporate comments from Emil Tantilov
> 
>  drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c  |   23
> +++++++++++++++++++++++
>  drivers/net/ethernet/intel/ixgbe/ixgbe_common.c |   20
> ++++++++++++++++++++
>  drivers/net/ethernet/intel/ixgbe/ixgbe_common.h |    1 +
>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c   |   11 +++++++++++
>  drivers/net/ethernet/intel/ixgbe/ixgbe_type.h   |    1 +
>  drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c   |    1 +
>  6 files changed, 57 insertions(+), 0 deletions(-)

Thanks Sowmini, I have added your patch to my queue.

I know that Emil and others are looking into whether we can do this for
all ixgbe silicon, so we may have a follow-on patch to the work you have
done already to enable this for all devices.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: [PATCH v4 2/4] can: kvaser_usb: Update error counters before exiting on OOM
From: Marc Kleine-Budde @ 2015-01-16 14:39 UTC (permalink / raw)
  To: Ahmed S. Darwish
  Cc: Olivier Sobrie, Oliver Hartkopp, Wolfgang Grandegger,
	David S. Miller, Paul Gortmaker, Linux-CAN, netdev, LKML,
	andri.yngvason
In-Reply-To: <20150112203650.GA11355@linux>

[-- Attachment #1: Type: text/plain, Size: 1626 bytes --]

On 01/12/2015 09:36 PM, Ahmed S. Darwish wrote:
> On Mon, Jan 12, 2015 at 12:09:32PM +0100, Marc Kleine-Budde wrote:
>> On 01/11/2015 09:15 PM, Ahmed S. Darwish wrote:
>>> From: Ahmed S. Darwish <ahmed.darwish@valeo.com>
>>>
>>> Let the error counters be more accurate in case of Out of
>>> Memory conditions.
>>
>> Please have a look at kvaser_usb_rx_error(), the whole state handling is
>> omitted in case of OOM.
>>
> 
> I see. Regarding kvaser_usb_rx_error(), would something like
> below patch be acceptable? 
> 
> Kindly note that separating recording interface state from
> error frame packet building leads to duplication of a good
> number of if-conditions. Meanwhile, it truly saves _all_
> of the possible state before any ENOMEM -- the correct thing
> to do.
> 
> Another solution was to allocate the can frame on the stack,
> and thus avoiding any code duplication. But this only leads
> to calls of "kvaser_usb_simple_msg_async", which can fail
> with -ENOMEM by itself, returning to the very same problem
> again. 
> 
> If the patch is acceptable, I'll rebase my USBCAN-II driver
> above it and re-submit the series (minus the merged patch).

Looks good from my point of view, stats and state are handled
independent of the error skb.

Andri can you have a look at the state handling itself?

Marc

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: [PATCH net] net: sctp: fix race for one-to-many sockets in sendmsg's auto associate
From: Vlad Yasevich @ 2015-01-16 14:31 UTC (permalink / raw)
  To: Daniel Borkmann, davem; +Cc: netdev, linux-sctp
In-Reply-To: <1421336075-25061-1-git-send-email-dborkman@redhat.com>

On 01/15/2015 10:34 AM, Daniel Borkmann wrote:
> I.e. one-to-many sockets in SCTP are not required to explicitly
> call into connect(2) or sctp_connectx(2) prior to data exchange.
> Instead, they can directly invoke sendmsg(2) and the SCTP stack
> will automatically trigger connection establishment through 4WHS
> via sctp_primitive_ASSOCIATE(). However, this in its current
> implementation is racy: INIT is being sent out immediately (as
> it cannot be bundled anyway) and the rest of the DATA chunks are
> queued up for later xmit when connection is established, meaning
> sendmsg(2) will return successfully. This behaviour can result
> in an undesired side-effect that the kernel made the application
> think the data has already been transmitted, although none of it
> has actually left the machine, worst case even after close(2)'ing
> the socket.
> 
> Instead, when the association from client side has been shut down
> e.g. first gracefully through SCTP_EOF and then close(2), the
> client could afterwards still receive the server's INIT_ACK due
> to a connection with higher latency. This INIT_ACK is then considered
> out of the blue and hence responded with ABORT as there was no
> alive assoc found anymore. This can be easily reproduced f.e.
> with sctp_test application from lksctp. One way to fix this race
> is to wait for the handshake to actually complete.
> 
> The fix defers waiting after sctp_primitive_ASSOCIATE() and
> sctp_primitive_SEND() succeeded, so that DATA chunks cooked up
> from sctp_sendmsg() have already been placed into the output
> queue through the side-effect interpreter, and therefore can then
> be bundeled together with COOKIE_ECHO control chunks.
> 
> strace from example application (shortened):
> 
> socket(PF_INET, SOCK_SEQPACKET, IPPROTO_SCTP) = 3
> sendmsg(3, {msg_name(28)={sa_family=AF_INET, sin_port=htons(8888), sin_addr=inet_addr("192.168.1.115")},
>            msg_iov(1)=[{"hello", 5}], msg_controllen=0, msg_flags=0}, 0) = 5
> sendmsg(3, {msg_name(28)={sa_family=AF_INET, sin_port=htons(8888), sin_addr=inet_addr("192.168.1.115")},
>            msg_iov(1)=[{"hello", 5}], msg_controllen=0, msg_flags=0}, 0) = 5
> sendmsg(3, {msg_name(28)={sa_family=AF_INET, sin_port=htons(8888), sin_addr=inet_addr("192.168.1.115")},
>            msg_iov(1)=[{"hello", 5}], msg_controllen=0, msg_flags=0}, 0) = 5
> sendmsg(3, {msg_name(28)={sa_family=AF_INET, sin_port=htons(8888), sin_addr=inet_addr("192.168.1.115")},
>            msg_iov(1)=[{"hello", 5}], msg_controllen=0, msg_flags=0}, 0) = 5
> sendmsg(3, {msg_name(28)={sa_family=AF_INET, sin_port=htons(8888), sin_addr=inet_addr("192.168.1.115")},
>            msg_iov(0)=[], msg_controllen=48, {cmsg_len=48, cmsg_level=0x84 /* SOL_??? */, cmsg_type=, ...},
>            msg_flags=0}, 0) = 0 // graceful shutdown for SOCK_SEQPACKET via SCTP_EOF
> close(3) = 0
> 
> tcpdump before patch (fooling the application):
> 
> 22:33:36.306142 IP 192.168.1.114.41462 > 192.168.1.115.8888: sctp (1) [INIT] [init tag: 3879023686] [rwnd: 106496] [OS: 10] [MIS: 65535] [init TSN: 3139201684]
> 22:33:36.316619 IP 192.168.1.115.8888 > 192.168.1.114.41462: sctp (1) [INIT ACK] [init tag: 3345394793] [rwnd: 106496] [OS: 10] [MIS: 10] [init TSN: 3380109591]
> 22:33:36.317600 IP 192.168.1.114.41462 > 192.168.1.115.8888: sctp (1) [ABORT]
> 
> tcpdump after patch:
> 
> 14:28:58.884116 IP 192.168.1.114.35846 > 192.168.1.115.8888: sctp (1) [INIT] [init tag: 438593213] [rwnd: 106496] [OS: 10] [MIS: 65535] [init TSN: 3092969729]
> 14:28:58.888414 IP 192.168.1.115.8888 > 192.168.1.114.35846: sctp (1) [INIT ACK] [init tag: 381429855] [rwnd: 106496] [OS: 10] [MIS: 10] [init TSN: 2141904492]
> 14:28:58.888638 IP 192.168.1.114.35846 > 192.168.1.115.8888: sctp (1) [COOKIE ECHO] , (2) [DATA] (B)(E) [TSN: 3092969729] [...]
> 14:28:58.893278 IP 192.168.1.115.8888 > 192.168.1.114.35846: sctp (1) [COOKIE ACK] , (2) [SACK] [cum ack 3092969729] [a_rwnd 106491] [#gap acks 0] [#dup tsns 0]
> 14:28:58.893591 IP 192.168.1.114.35846 > 192.168.1.115.8888: sctp (1) [DATA] (B)(E) [TSN: 3092969730] [...]
> 14:28:59.096963 IP 192.168.1.115.8888 > 192.168.1.114.35846: sctp (1) [SACK] [cum ack 3092969730] [a_rwnd 106496] [#gap acks 0] [#dup tsns 0]
> 14:28:59.097086 IP 192.168.1.114.35846 > 192.168.1.115.8888: sctp (1) [DATA] (B)(E) [TSN: 3092969731] [...] , (2) [DATA] (B)(E) [TSN: 3092969732] [...]
> 14:28:59.103218 IP 192.168.1.115.8888 > 192.168.1.114.35846: sctp (1) [SACK] [cum ack 3092969732] [a_rwnd 106486] [#gap acks 0] [#dup tsns 0]
> 14:28:59.103330 IP 192.168.1.114.35846 > 192.168.1.115.8888: sctp (1) [SHUTDOWN]
> 14:28:59.107793 IP 192.168.1.115.8888 > 192.168.1.114.35846: sctp (1) [SHUTDOWN ACK]
> 14:28:59.107890 IP 192.168.1.114.35846 > 192.168.1.115.8888: sctp (1) [SHUTDOWN COMPLETE]
> 
> Looks like this bug is from the pre-git history museum. ;)
> 
> Fixes: 08707d5482df ("lksctp-2_5_31-0_5_1.patch")
> Signed-off-by: Daniel Borkmann <dborkman@redhat.com>

Acked-by: Vlad Yasevich <vyasevich@gmail.com>

We also need to be remember that the same scenario can be reproduced without an
implicit connect by simply using non-blocking socket on a high rtt link.  I've made this
comment privately to Daniel, but want to mention this on the list.  Daniel and I will be
working on additional patches to address that issue.

-vlad

> ---
>  net/sctp/socket.c | 8 +++++++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
> index 2625ecc..aafe94b 100644
> --- a/net/sctp/socket.c
> +++ b/net/sctp/socket.c
> @@ -1603,7 +1603,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
>  	sctp_assoc_t associd = 0;
>  	sctp_cmsgs_t cmsgs = { NULL };
>  	sctp_scope_t scope;
> -	bool fill_sinfo_ttl = false;
> +	bool fill_sinfo_ttl = false, wait_connect = false;
>  	struct sctp_datamsg *datamsg;
>  	int msg_flags = msg->msg_flags;
>  	__u16 sinfo_flags = 0;
> @@ -1943,6 +1943,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
>  		if (err < 0)
>  			goto out_free;
>  
> +		wait_connect = true;
>  		pr_debug("%s: we associated primitively\n", __func__);
>  	}
>  
> @@ -1980,6 +1981,11 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
>  	sctp_datamsg_put(datamsg);
>  	err = msg_len;
>  
> +	if (unlikely(wait_connect)) {
> +		timeo = sock_sndtimeo(sk, msg_flags & MSG_DONTWAIT);
> +		sctp_wait_for_connect(asoc, &timeo);
> +	}
> +
>  	/* If we are already past ASSOCIATE, the lower
>  	 * layers are responsible for association cleanup.
>  	 */
> 

^ permalink raw reply

* Re: [net-next PATCH v3 1/1] atm: remove deprecated use of pci api
From: Quentin Lambert @ 2015-01-16 14:10 UTC (permalink / raw)
  To: chas williams - CONTRACTOR, David Laight
  Cc: 'David Miller', linux-atm-general@lists.sourceforge.net,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <20150116085721.56aa8075@thirdoffive.cmf.nrl.navy.mil>


On 16/01/2015 14:57, chas williams - CONTRACTOR wrote:
>
> Signed-off-by: Chas Williams - CONTRACTOR <chas@cmf.nrl.navy.mil>
> ---
>   drivers/atm/eni.c       |  33 +++++++------
>   drivers/atm/fore200e.c  |  22 +++++----
>   drivers/atm/he.c        | 125 +++++++++++++++++++++++++-----------------------
>   drivers/atm/he.h        |   4 +-
>   drivers/atm/idt77252.c  | 107 ++++++++++++++++++++++-------------------
>   drivers/atm/iphase.c    |  54 +++++++++++----------
>   drivers/atm/lanai.c     |  14 ++----
>   drivers/atm/nicstar.c   |  60 +++++++++++------------
>   drivers/atm/solos-pci.c |  26 +++++-----
>   drivers/atm/zatm.c      |  17 ++++---
>   10 files changed, 243 insertions(+), 219 deletions(-)
>
[]
> diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
> index d5d9eaf..75dde90 100644
> --- a/drivers/atm/fore200e.c
> +++ b/drivers/atm/fore200e.c
> @@ -425,7 +425,7 @@ static void fore200e_pca_write(u32 val, volatile u32 __iomem *addr)
>   static u32
>   fore200e_pca_dma_map(struct fore200e* fore200e, void* virt_addr, int size, int direction)
>   {
> -    u32 dma_addr = pci_map_single((struct pci_dev*)fore200e->bus_dev, virt_addr, size, direction);
> +    u32 dma_addr = dma_map_single(&((struct pci_dev *) fore200e->bus_dev)->dev, virt_addr, size, direction);
>   
>       DPRINTK(3, "PCI DVMA mapping: virt_addr = 0x%p, size = %d, direction = %d,  --> dma_addr = 0x%08x\n",
>   	    virt_addr, size, direction, dma_addr);
>
[]

I am going try to make similar changes in some other part of the kernel and
I was wondering if you could explain how you decided it wasn't necessary to
check for "((struct pci_dev *) fore200e->bus_dev" nullity for instance.

Thanks,

Quentin

^ permalink raw reply

* [net-next PATCH v3 1/1] atm: remove deprecated use of pci api
From: chas williams - CONTRACTOR @ 2015-01-16 13:57 UTC (permalink / raw)
  To: David Laight
  Cc: 'David Miller', lambert.quentin@gmail.com,
	linux-atm-general@lists.sourceforge.net, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <063D6719AE5E284EB5DD2968C1650D6D1CAC79B3@AcuExch.aculab.com>



Signed-off-by: Chas Williams - CONTRACTOR <chas@cmf.nrl.navy.mil>
---
 drivers/atm/eni.c       |  33 +++++++------
 drivers/atm/fore200e.c  |  22 +++++----
 drivers/atm/he.c        | 125 +++++++++++++++++++++++++-----------------------
 drivers/atm/he.h        |   4 +-
 drivers/atm/idt77252.c  | 107 ++++++++++++++++++++++-------------------
 drivers/atm/iphase.c    |  54 +++++++++++----------
 drivers/atm/lanai.c     |  14 ++----
 drivers/atm/nicstar.c   |  60 +++++++++++------------
 drivers/atm/solos-pci.c |  26 +++++-----
 drivers/atm/zatm.c      |  17 ++++---
 10 files changed, 243 insertions(+), 219 deletions(-)

diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index c7fab3e..6339efd 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -354,9 +354,9 @@ static int do_rx_dma(struct atm_vcc *vcc,struct sk_buff *skb,
 	eni_vcc = ENI_VCC(vcc);
 	paddr = 0; /* GCC, shut up */
 	if (skb) {
-		paddr = pci_map_single(eni_dev->pci_dev,skb->data,skb->len,
-		    PCI_DMA_FROMDEVICE);
-		if (pci_dma_mapping_error(eni_dev->pci_dev, paddr))
+		paddr = dma_map_single(&eni_dev->pci_dev->dev,skb->data,skb->len,
+				       DMA_FROM_DEVICE);
+		if (dma_mapping_error(&eni_dev->pci_dev->dev, paddr))
 			goto dma_map_error;
 		ENI_PRV_PADDR(skb) = paddr;
 		if (paddr & 3)
@@ -481,8 +481,8 @@ rx_enqueued++;
 
 trouble:
 	if (paddr)
-		pci_unmap_single(eni_dev->pci_dev,paddr,skb->len,
-		    PCI_DMA_FROMDEVICE);
+		dma_unmap_single(&eni_dev->pci_dev->dev,paddr,skb->len,
+				 DMA_FROM_DEVICE);
 dma_map_error:
 	if (skb) dev_kfree_skb_irq(skb);
 	return -1;
@@ -758,8 +758,8 @@ rx_dequeued++;
 		}
 		eni_vcc->rxing--;
 		eni_vcc->rx_pos = ENI_PRV_POS(skb) & (eni_vcc->words-1);
-		pci_unmap_single(eni_dev->pci_dev,ENI_PRV_PADDR(skb),skb->len,
-		    PCI_DMA_TODEVICE);
+		dma_unmap_single(&eni_dev->pci_dev->dev,ENI_PRV_PADDR(skb),skb->len,
+			         DMA_TO_DEVICE);
 		if (!skb->len) dev_kfree_skb_irq(skb);
 		else {
 			EVENT("pushing (len=%ld)\n",skb->len,0);
@@ -1112,8 +1112,8 @@ DPRINTK("iovcnt = %d\n",skb_shinfo(skb)->nr_frags);
 		    vcc->dev->number);
 		return enq_jam;
 	}
-	paddr = pci_map_single(eni_dev->pci_dev,skb->data,skb->len,
-	    PCI_DMA_TODEVICE);
+	paddr = dma_map_single(&eni_dev->pci_dev->dev,skb->data,skb->len,
+			       DMA_TO_DEVICE);
 	ENI_PRV_PADDR(skb) = paddr;
 	/* prepare DMA queue entries */
 	j = 0;
@@ -1226,8 +1226,8 @@ static void dequeue_tx(struct atm_dev *dev)
 			break;
 		}
 		ENI_VCC(vcc)->txing -= ENI_PRV_SIZE(skb);
-		pci_unmap_single(eni_dev->pci_dev,ENI_PRV_PADDR(skb),skb->len,
-		    PCI_DMA_TODEVICE);
+		dma_unmap_single(&eni_dev->pci_dev->dev,ENI_PRV_PADDR(skb),skb->len,
+				 DMA_TO_DEVICE);
 		if (vcc->pop) vcc->pop(vcc,skb);
 		else dev_kfree_skb_irq(skb);
 		atomic_inc(&vcc->stats->tx);
@@ -2240,13 +2240,18 @@ static int eni_init_one(struct pci_dev *pci_dev,
 	if (rc < 0)
 		goto out;
 
+	rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32));
+	if (rc < 0)
+		goto out;
+
 	rc = -ENOMEM;
 	eni_dev = kmalloc(sizeof(struct eni_dev), GFP_KERNEL);
 	if (!eni_dev)
 		goto err_disable;
 
 	zero = &eni_dev->zero;
-	zero->addr = pci_alloc_consistent(pci_dev, ENI_ZEROES_SIZE, &zero->dma);
+	zero->addr = dma_alloc_coherent(&pci_dev->dev,
+					ENI_ZEROES_SIZE, &zero->dma, GFP_KERNEL);
 	if (!zero->addr)
 		goto err_kfree;
 
@@ -2277,7 +2282,7 @@ err_eni_release:
 err_unregister:
 	atm_dev_deregister(dev);
 err_free_consistent:
-	pci_free_consistent(pci_dev, ENI_ZEROES_SIZE, zero->addr, zero->dma);
+	dma_free_coherent(&pci_dev->dev, ENI_ZEROES_SIZE, zero->addr, zero->dma);
 err_kfree:
 	kfree(eni_dev);
 err_disable:
@@ -2302,7 +2307,7 @@ static void eni_remove_one(struct pci_dev *pdev)
 
 	eni_do_release(dev);
 	atm_dev_deregister(dev);
-	pci_free_consistent(pdev, ENI_ZEROES_SIZE, zero->addr, zero->dma);
+	dma_free_coherent(&pdev->dev, ENI_ZEROES_SIZE, zero->addr, zero->dma);
 	kfree(ed);
 	pci_disable_device(pdev);
 }
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index d5d9eaf..75dde90 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -425,7 +425,7 @@ static void fore200e_pca_write(u32 val, volatile u32 __iomem *addr)
 static u32
 fore200e_pca_dma_map(struct fore200e* fore200e, void* virt_addr, int size, int direction)
 {
-    u32 dma_addr = pci_map_single((struct pci_dev*)fore200e->bus_dev, virt_addr, size, direction);
+    u32 dma_addr = dma_map_single(&((struct pci_dev *) fore200e->bus_dev)->dev, virt_addr, size, direction);
 
     DPRINTK(3, "PCI DVMA mapping: virt_addr = 0x%p, size = %d, direction = %d,  --> dma_addr = 0x%08x\n",
 	    virt_addr, size, direction, dma_addr);
@@ -440,7 +440,7 @@ fore200e_pca_dma_unmap(struct fore200e* fore200e, u32 dma_addr, int size, int di
     DPRINTK(3, "PCI DVMA unmapping: dma_addr = 0x%08x, size = %d, direction = %d\n",
 	    dma_addr, size, direction);
 
-    pci_unmap_single((struct pci_dev*)fore200e->bus_dev, dma_addr, size, direction);
+    dma_unmap_single(&((struct pci_dev *) fore200e->bus_dev)->dev, dma_addr, size, direction);
 }
 
 
@@ -449,7 +449,7 @@ fore200e_pca_dma_sync_for_cpu(struct fore200e* fore200e, u32 dma_addr, int size,
 {
     DPRINTK(3, "PCI DVMA sync: dma_addr = 0x%08x, size = %d, direction = %d\n", dma_addr, size, direction);
 
-    pci_dma_sync_single_for_cpu((struct pci_dev*)fore200e->bus_dev, dma_addr, size, direction);
+    dma_sync_single_for_cpu(&((struct pci_dev *) fore200e->bus_dev)->dev, dma_addr, size, direction);
 }
 
 static void
@@ -457,7 +457,7 @@ fore200e_pca_dma_sync_for_device(struct fore200e* fore200e, u32 dma_addr, int si
 {
     DPRINTK(3, "PCI DVMA sync: dma_addr = 0x%08x, size = %d, direction = %d\n", dma_addr, size, direction);
 
-    pci_dma_sync_single_for_device((struct pci_dev*)fore200e->bus_dev, dma_addr, size, direction);
+    dma_sync_single_for_device(&((struct pci_dev *) fore200e->bus_dev)->dev, dma_addr, size, direction);
 }
 
 
@@ -470,9 +470,10 @@ fore200e_pca_dma_chunk_alloc(struct fore200e* fore200e, struct chunk* chunk,
 {
     /* returned chunks are page-aligned */
     chunk->alloc_size = size * nbr;
-    chunk->alloc_addr = pci_alloc_consistent((struct pci_dev*)fore200e->bus_dev,
-					     chunk->alloc_size,
-					     &chunk->dma_addr);
+    chunk->alloc_addr = dma_alloc_coherent(&((struct pci_dev *) fore200e->bus_dev)->dev,
+					   chunk->alloc_size,
+					   &chunk->dma_addr,
+					   GFP_KERNEL);
     
     if ((chunk->alloc_addr == NULL) || (chunk->dma_addr == 0))
 	return -ENOMEM;
@@ -488,7 +489,7 @@ fore200e_pca_dma_chunk_alloc(struct fore200e* fore200e, struct chunk* chunk,
 static void
 fore200e_pca_dma_chunk_free(struct fore200e* fore200e, struct chunk* chunk)
 {
-    pci_free_consistent((struct pci_dev*)fore200e->bus_dev,
+    dma_free_coherent(&((struct pci_dev *) fore200e->bus_dev)->dev,
 			chunk->alloc_size,
 			chunk->alloc_addr,
 			chunk->dma_addr);
@@ -2707,6 +2708,11 @@ static int fore200e_pca_detect(struct pci_dev *pci_dev,
 	err = -EINVAL;
 	goto out;
     }
+
+    if (dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32))) {
+	err = -EINVAL;
+	goto out;
+    }
     
     fore200e = kzalloc(sizeof(struct fore200e), GFP_KERNEL);
     if (fore200e == NULL) {
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index c39702b..93dca2e 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -359,7 +359,7 @@ static int he_init_one(struct pci_dev *pci_dev,
 
 	if (pci_enable_device(pci_dev))
 		return -EIO;
-	if (pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32)) != 0) {
+	if (dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)) != 0) {
 		printk(KERN_WARNING "he: no suitable dma available\n");
 		err = -EIO;
 		goto init_one_failure;
@@ -533,9 +533,9 @@ static void he_init_tx_lbfp(struct he_dev *he_dev)
 
 static int he_init_tpdrq(struct he_dev *he_dev)
 {
-	he_dev->tpdrq_base = pci_zalloc_consistent(he_dev->pci_dev,
-						   CONFIG_TPDRQ_SIZE * sizeof(struct he_tpdrq),
-						   &he_dev->tpdrq_phys);
+	he_dev->tpdrq_base = dma_zalloc_coherent(&he_dev->pci_dev->dev,
+						 CONFIG_TPDRQ_SIZE * sizeof(struct he_tpdrq),
+						 &he_dev->tpdrq_phys, GFP_KERNEL);
 	if (he_dev->tpdrq_base == NULL) {
 		hprintk("failed to alloc tpdrq\n");
 		return -ENOMEM;
@@ -796,16 +796,16 @@ static int he_init_group(struct he_dev *he_dev, int group)
 	}
 
 	/* large buffer pool */
-	he_dev->rbpl_pool = pci_pool_create("rbpl", he_dev->pci_dev,
+	he_dev->rbpl_pool = dma_pool_create("rbpl", &he_dev->pci_dev->dev,
 					    CONFIG_RBPL_BUFSIZE, 64, 0);
 	if (he_dev->rbpl_pool == NULL) {
 		hprintk("unable to create rbpl pool\n");
 		goto out_free_rbpl_virt;
 	}
 
-	he_dev->rbpl_base = pci_zalloc_consistent(he_dev->pci_dev,
-						  CONFIG_RBPL_SIZE * sizeof(struct he_rbp),
-						  &he_dev->rbpl_phys);
+	he_dev->rbpl_base = dma_zalloc_coherent(&he_dev->pci_dev->dev,
+						CONFIG_RBPL_SIZE * sizeof(struct he_rbp),
+						&he_dev->rbpl_phys, GFP_KERNEL);
 	if (he_dev->rbpl_base == NULL) {
 		hprintk("failed to alloc rbpl_base\n");
 		goto out_destroy_rbpl_pool;
@@ -815,7 +815,7 @@ static int he_init_group(struct he_dev *he_dev, int group)
 
 	for (i = 0; i < CONFIG_RBPL_SIZE; ++i) {
 
-		heb = pci_pool_alloc(he_dev->rbpl_pool, GFP_KERNEL|GFP_DMA, &mapping);
+		heb = dma_pool_alloc(he_dev->rbpl_pool, GFP_KERNEL, &mapping);
 		if (!heb)
 			goto out_free_rbpl;
 		heb->mapping = mapping;
@@ -842,9 +842,9 @@ static int he_init_group(struct he_dev *he_dev, int group)
 
 	/* rx buffer ready queue */
 
-	he_dev->rbrq_base = pci_zalloc_consistent(he_dev->pci_dev,
-						  CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq),
-						  &he_dev->rbrq_phys);
+	he_dev->rbrq_base = dma_zalloc_coherent(&he_dev->pci_dev->dev,
+						CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq),
+						&he_dev->rbrq_phys, GFP_KERNEL);
 	if (he_dev->rbrq_base == NULL) {
 		hprintk("failed to allocate rbrq\n");
 		goto out_free_rbpl;
@@ -866,9 +866,9 @@ static int he_init_group(struct he_dev *he_dev, int group)
 
 	/* tx buffer ready queue */
 
-	he_dev->tbrq_base = pci_zalloc_consistent(he_dev->pci_dev,
-						  CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq),
-						  &he_dev->tbrq_phys);
+	he_dev->tbrq_base = dma_zalloc_coherent(&he_dev->pci_dev->dev,
+						CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq),
+						&he_dev->tbrq_phys, GFP_KERNEL);
 	if (he_dev->tbrq_base == NULL) {
 		hprintk("failed to allocate tbrq\n");
 		goto out_free_rbpq_base;
@@ -884,18 +884,18 @@ static int he_init_group(struct he_dev *he_dev, int group)
 	return 0;
 
 out_free_rbpq_base:
-	pci_free_consistent(he_dev->pci_dev, CONFIG_RBRQ_SIZE *
-			sizeof(struct he_rbrq), he_dev->rbrq_base,
-			he_dev->rbrq_phys);
+	dma_free_coherent(&he_dev->pci_dev->dev, CONFIG_RBRQ_SIZE *
+			  sizeof(struct he_rbrq), he_dev->rbrq_base,
+			  he_dev->rbrq_phys);
 out_free_rbpl:
 	list_for_each_entry_safe(heb, next, &he_dev->rbpl_outstanding, entry)
-		pci_pool_free(he_dev->rbpl_pool, heb, heb->mapping);
+		dma_pool_free(he_dev->rbpl_pool, heb, heb->mapping);
 
-	pci_free_consistent(he_dev->pci_dev, CONFIG_RBPL_SIZE *
-			sizeof(struct he_rbp), he_dev->rbpl_base,
-			he_dev->rbpl_phys);
+	dma_free_coherent(&he_dev->pci_dev->dev, CONFIG_RBPL_SIZE *
+			  sizeof(struct he_rbp), he_dev->rbpl_base,
+			  he_dev->rbpl_phys);
 out_destroy_rbpl_pool:
-	pci_pool_destroy(he_dev->rbpl_pool);
+	dma_pool_destroy(he_dev->rbpl_pool);
 out_free_rbpl_virt:
 	kfree(he_dev->rbpl_virt);
 out_free_rbpl_table:
@@ -911,8 +911,11 @@ static int he_init_irq(struct he_dev *he_dev)
 	/* 2.9.3.5  tail offset for each interrupt queue is located after the
 		    end of the interrupt queue */
 
-	he_dev->irq_base = pci_alloc_consistent(he_dev->pci_dev,
-			(CONFIG_IRQ_SIZE+1) * sizeof(struct he_irq), &he_dev->irq_phys);
+	he_dev->irq_base = dma_zalloc_coherent(&he_dev->pci_dev->dev,
+					       (CONFIG_IRQ_SIZE + 1)
+					       * sizeof(struct he_irq),
+					       &he_dev->irq_phys,
+					       GFP_KERNEL);
 	if (he_dev->irq_base == NULL) {
 		hprintk("failed to allocate irq\n");
 		return -ENOMEM;
@@ -1419,10 +1422,10 @@ static int he_start(struct atm_dev *dev)
 
 	he_init_tpdrq(he_dev);
 
-	he_dev->tpd_pool = pci_pool_create("tpd", he_dev->pci_dev,
-		sizeof(struct he_tpd), TPD_ALIGNMENT, 0);
+	he_dev->tpd_pool = dma_pool_create("tpd", &he_dev->pci_dev->dev,
+					   sizeof(struct he_tpd), TPD_ALIGNMENT, 0);
 	if (he_dev->tpd_pool == NULL) {
-		hprintk("unable to create tpd pci_pool\n");
+		hprintk("unable to create tpd dma_pool\n");
 		return -ENOMEM;         
 	}
 
@@ -1459,9 +1462,9 @@ static int he_start(struct atm_dev *dev)
 
 	/* host status page */
 
-	he_dev->hsp = pci_zalloc_consistent(he_dev->pci_dev,
-					    sizeof(struct he_hsp),
-					    &he_dev->hsp_phys);
+	he_dev->hsp = dma_zalloc_coherent(&he_dev->pci_dev->dev,
+					  sizeof(struct he_hsp),
+					  &he_dev->hsp_phys, GFP_KERNEL);
 	if (he_dev->hsp == NULL) {
 		hprintk("failed to allocate host status page\n");
 		return -ENOMEM;
@@ -1558,41 +1561,41 @@ he_stop(struct he_dev *he_dev)
 		free_irq(he_dev->irq, he_dev);
 
 	if (he_dev->irq_base)
-		pci_free_consistent(he_dev->pci_dev, (CONFIG_IRQ_SIZE+1)
-			* sizeof(struct he_irq), he_dev->irq_base, he_dev->irq_phys);
+		dma_free_coherent(&he_dev->pci_dev->dev, (CONFIG_IRQ_SIZE + 1)
+				  * sizeof(struct he_irq), he_dev->irq_base, he_dev->irq_phys);
 
 	if (he_dev->hsp)
-		pci_free_consistent(he_dev->pci_dev, sizeof(struct he_hsp),
-						he_dev->hsp, he_dev->hsp_phys);
+		dma_free_coherent(&he_dev->pci_dev->dev, sizeof(struct he_hsp),
+				  he_dev->hsp, he_dev->hsp_phys);
 
 	if (he_dev->rbpl_base) {
 		list_for_each_entry_safe(heb, next, &he_dev->rbpl_outstanding, entry)
-			pci_pool_free(he_dev->rbpl_pool, heb, heb->mapping);
+			dma_pool_free(he_dev->rbpl_pool, heb, heb->mapping);
 
-		pci_free_consistent(he_dev->pci_dev, CONFIG_RBPL_SIZE
-			* sizeof(struct he_rbp), he_dev->rbpl_base, he_dev->rbpl_phys);
+		dma_free_coherent(&he_dev->pci_dev->dev, CONFIG_RBPL_SIZE
+				  * sizeof(struct he_rbp), he_dev->rbpl_base, he_dev->rbpl_phys);
 	}
 
 	kfree(he_dev->rbpl_virt);
 	kfree(he_dev->rbpl_table);
 
 	if (he_dev->rbpl_pool)
-		pci_pool_destroy(he_dev->rbpl_pool);
+		dma_pool_destroy(he_dev->rbpl_pool);
 
 	if (he_dev->rbrq_base)
-		pci_free_consistent(he_dev->pci_dev, CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq),
-							he_dev->rbrq_base, he_dev->rbrq_phys);
+		dma_free_coherent(&he_dev->pci_dev->dev, CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq),
+				  he_dev->rbrq_base, he_dev->rbrq_phys);
 
 	if (he_dev->tbrq_base)
-		pci_free_consistent(he_dev->pci_dev, CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq),
-							he_dev->tbrq_base, he_dev->tbrq_phys);
+		dma_free_coherent(&he_dev->pci_dev->dev, CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq),
+				  he_dev->tbrq_base, he_dev->tbrq_phys);
 
 	if (he_dev->tpdrq_base)
-		pci_free_consistent(he_dev->pci_dev, CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq),
-							he_dev->tpdrq_base, he_dev->tpdrq_phys);
+		dma_free_coherent(&he_dev->pci_dev->dev, CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq),
+				  he_dev->tpdrq_base, he_dev->tpdrq_phys);
 
 	if (he_dev->tpd_pool)
-		pci_pool_destroy(he_dev->tpd_pool);
+		dma_pool_destroy(he_dev->tpd_pool);
 
 	if (he_dev->pci_dev) {
 		pci_read_config_word(he_dev->pci_dev, PCI_COMMAND, &command);
@@ -1610,7 +1613,7 @@ __alloc_tpd(struct he_dev *he_dev)
 	struct he_tpd *tpd;
 	dma_addr_t mapping;
 
-	tpd = pci_pool_alloc(he_dev->tpd_pool, GFP_ATOMIC|GFP_DMA, &mapping);
+	tpd = dma_pool_alloc(he_dev->tpd_pool, GFP_ATOMIC, &mapping);
 	if (tpd == NULL)
 		return NULL;
 			
@@ -1681,7 +1684,7 @@ he_service_rbrq(struct he_dev *he_dev, int group)
 			if (!RBRQ_HBUF_ERR(he_dev->rbrq_head)) {
 				clear_bit(i, he_dev->rbpl_table);
 				list_del(&heb->entry);
-				pci_pool_free(he_dev->rbpl_pool, heb, heb->mapping);
+				dma_pool_free(he_dev->rbpl_pool, heb, heb->mapping);
 			}
 					
 			goto next_rbrq_entry;
@@ -1774,7 +1777,7 @@ return_host_buffers:
 		++pdus_assembled;
 
 		list_for_each_entry_safe(heb, next, &he_vcc->buffers, entry)
-			pci_pool_free(he_dev->rbpl_pool, heb, heb->mapping);
+			dma_pool_free(he_dev->rbpl_pool, heb, heb->mapping);
 		INIT_LIST_HEAD(&he_vcc->buffers);
 		he_vcc->pdu_len = 0;
 
@@ -1843,10 +1846,10 @@ he_service_tbrq(struct he_dev *he_dev, int group)
 
 		for (slot = 0; slot < TPD_MAXIOV; ++slot) {
 			if (tpd->iovec[slot].addr)
-				pci_unmap_single(he_dev->pci_dev,
+				dma_unmap_single(&he_dev->pci_dev->dev,
 					tpd->iovec[slot].addr,
 					tpd->iovec[slot].len & TPD_LEN_MASK,
-							PCI_DMA_TODEVICE);
+							DMA_TO_DEVICE);
 			if (tpd->iovec[slot].len & TPD_LST)
 				break;
 				
@@ -1861,7 +1864,7 @@ he_service_tbrq(struct he_dev *he_dev, int group)
 
 next_tbrq_entry:
 		if (tpd)
-			pci_pool_free(he_dev->tpd_pool, tpd, TPD_ADDR(tpd->status));
+			dma_pool_free(he_dev->tpd_pool, tpd, TPD_ADDR(tpd->status));
 		he_dev->tbrq_head = (struct he_tbrq *)
 				((unsigned long) he_dev->tbrq_base |
 					TBRQ_MASK(he_dev->tbrq_head + 1));
@@ -1905,7 +1908,7 @@ he_service_rbpl(struct he_dev *he_dev, int group)
 		}
 		he_dev->rbpl_hint = i + 1;
 
-		heb = pci_pool_alloc(he_dev->rbpl_pool, GFP_ATOMIC|GFP_DMA, &mapping);
+		heb = dma_pool_alloc(he_dev->rbpl_pool, GFP_ATOMIC, &mapping);
 		if (!heb)
 			break;
 		heb->mapping = mapping;
@@ -2084,10 +2087,10 @@ __enqueue_tpd(struct he_dev *he_dev, struct he_tpd *tpd, unsigned cid)
 			 */
 			for (slot = 0; slot < TPD_MAXIOV; ++slot) {
 				if (tpd->iovec[slot].addr)
-					pci_unmap_single(he_dev->pci_dev,
+					dma_unmap_single(&he_dev->pci_dev->dev,
 						tpd->iovec[slot].addr,
 						tpd->iovec[slot].len & TPD_LEN_MASK,
-								PCI_DMA_TODEVICE);
+								DMA_TO_DEVICE);
 			}
 			if (tpd->skb) {
 				if (tpd->vcc->pop)
@@ -2096,7 +2099,7 @@ __enqueue_tpd(struct he_dev *he_dev, struct he_tpd *tpd, unsigned cid)
 					dev_kfree_skb_any(tpd->skb);
 				atomic_inc(&tpd->vcc->stats->tx_err);
 			}
-			pci_pool_free(he_dev->tpd_pool, tpd, TPD_ADDR(tpd->status));
+			dma_pool_free(he_dev->tpd_pool, tpd, TPD_ADDR(tpd->status));
 			return;
 		}
 	}
@@ -2550,8 +2553,8 @@ he_send(struct atm_vcc *vcc, struct sk_buff *skb)
 	}
 
 #ifdef USE_SCATTERGATHER
-	tpd->iovec[slot].addr = pci_map_single(he_dev->pci_dev, skb->data,
-				skb_headlen(skb), PCI_DMA_TODEVICE);
+	tpd->iovec[slot].addr = dma_map_single(&he_dev->pci_dev->dev, skb->data,
+				skb_headlen(skb), DMA_TO_DEVICE);
 	tpd->iovec[slot].len = skb_headlen(skb);
 	++slot;
 
@@ -2579,9 +2582,9 @@ he_send(struct atm_vcc *vcc, struct sk_buff *skb)
 			slot = 0;
 		}
 
-		tpd->iovec[slot].addr = pci_map_single(he_dev->pci_dev,
+		tpd->iovec[slot].addr = dma_map_single(&he_dev->pci_dev->dev,
 			(void *) page_address(frag->page) + frag->page_offset,
-				frag->size, PCI_DMA_TODEVICE);
+				frag->size, DMA_TO_DEVICE);
 		tpd->iovec[slot].len = frag->size;
 		++slot;
 
@@ -2589,7 +2592,7 @@ he_send(struct atm_vcc *vcc, struct sk_buff *skb)
 
 	tpd->iovec[slot - 1].len |= TPD_LST;
 #else
-	tpd->address0 = pci_map_single(he_dev->pci_dev, skb->data, skb->len, PCI_DMA_TODEVICE);
+	tpd->address0 = dma_map_single(&he_dev->pci_dev->dev, skb->data, skb->len, DMA_TO_DEVICE);
 	tpd->length0 = skb->len | TPD_LST;
 #endif
 	tpd->status |= TPD_INT;
diff --git a/drivers/atm/he.h b/drivers/atm/he.h
index 110a27d..f3f5367 100644
--- a/drivers/atm/he.h
+++ b/drivers/atm/he.h
@@ -281,7 +281,7 @@ struct he_dev {
 	int irq_peak;
 
 	struct tasklet_struct tasklet;
-	struct pci_pool *tpd_pool;
+	struct dma_pool *tpd_pool;
 	struct list_head outstanding_tpds;
 
 	dma_addr_t tpdrq_phys;
@@ -296,7 +296,7 @@ struct he_dev {
 	struct he_buff **rbpl_virt;
 	unsigned long *rbpl_table;
 	unsigned long rbpl_hint;
-	struct pci_pool *rbpl_pool;
+	struct dma_pool *rbpl_pool;
 	dma_addr_t rbpl_phys;
 	struct he_rbp *rbpl_base, *rbpl_tail;
 	struct list_head rbpl_outstanding;
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 2b24ed0..074616b 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -641,7 +641,8 @@ alloc_scq(struct idt77252_dev *card, int class)
 	scq = kzalloc(sizeof(struct scq_info), GFP_KERNEL);
 	if (!scq)
 		return NULL;
-	scq->base = pci_zalloc_consistent(card->pcidev, SCQ_SIZE, &scq->paddr);
+	scq->base = dma_zalloc_coherent(&card->pcidev->dev, SCQ_SIZE,
+					&scq->paddr, GFP_KERNEL);
 	if (scq->base == NULL) {
 		kfree(scq);
 		return NULL;
@@ -669,12 +670,12 @@ free_scq(struct idt77252_dev *card, struct scq_info *scq)
 	struct sk_buff *skb;
 	struct atm_vcc *vcc;
 
-	pci_free_consistent(card->pcidev, SCQ_SIZE,
-			    scq->base, scq->paddr);
+	dma_free_coherent(&card->pcidev->dev, SCQ_SIZE,
+			  scq->base, scq->paddr);
 
 	while ((skb = skb_dequeue(&scq->transmit))) {
-		pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
-				 skb->len, PCI_DMA_TODEVICE);
+		dma_unmap_single(&card->pcidev->dev, IDT77252_PRV_PADDR(skb),
+				 skb->len, DMA_TO_DEVICE);
 
 		vcc = ATM_SKB(skb)->vcc;
 		if (vcc->pop)
@@ -684,8 +685,8 @@ free_scq(struct idt77252_dev *card, struct scq_info *scq)
 	}
 
 	while ((skb = skb_dequeue(&scq->pending))) {
-		pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
-				 skb->len, PCI_DMA_TODEVICE);
+		dma_unmap_single(&card->pcidev->dev, IDT77252_PRV_PADDR(skb),
+				 skb->len, DMA_TO_DEVICE);
 
 		vcc = ATM_SKB(skb)->vcc;
 		if (vcc->pop)
@@ -800,8 +801,8 @@ drain_scq(struct idt77252_dev *card, struct vc_map *vc)
 	if (skb) {
 		TXPRINTK("%s: freeing skb at %p.\n", card->name, skb);
 
-		pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
-				 skb->len, PCI_DMA_TODEVICE);
+		dma_unmap_single(&card->pcidev->dev, IDT77252_PRV_PADDR(skb),
+				 skb->len, DMA_TO_DEVICE);
 
 		vcc = ATM_SKB(skb)->vcc;
 
@@ -846,8 +847,8 @@ queue_skb(struct idt77252_dev *card, struct vc_map *vc,
 	tbd = &IDT77252_PRV_TBD(skb);
 	vcc = ATM_SKB(skb)->vcc;
 
-	IDT77252_PRV_PADDR(skb) = pci_map_single(card->pcidev, skb->data,
-						 skb->len, PCI_DMA_TODEVICE);
+	IDT77252_PRV_PADDR(skb) = dma_map_single(&card->pcidev->dev, skb->data,
+						 skb->len, DMA_TO_DEVICE);
 
 	error = -EINVAL;
 
@@ -924,8 +925,8 @@ done:
 	return 0;
 
 errout:
-	pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
-			 skb->len, PCI_DMA_TODEVICE);
+	dma_unmap_single(&card->pcidev->dev, IDT77252_PRV_PADDR(skb),
+			 skb->len, DMA_TO_DEVICE);
 	return error;
 }
 
@@ -970,8 +971,8 @@ init_rsq(struct idt77252_dev *card)
 {
 	struct rsq_entry *rsqe;
 
-	card->rsq.base = pci_zalloc_consistent(card->pcidev, RSQSIZE,
-					       &card->rsq.paddr);
+	card->rsq.base = dma_zalloc_coherent(&card->pcidev->dev, RSQSIZE,
+					     &card->rsq.paddr, GFP_KERNEL);
 	if (card->rsq.base == NULL) {
 		printk("%s: can't allocate RSQ.\n", card->name);
 		return -1;
@@ -1001,8 +1002,8 @@ init_rsq(struct idt77252_dev *card)
 static void
 deinit_rsq(struct idt77252_dev *card)
 {
-	pci_free_consistent(card->pcidev, RSQSIZE,
-			    card->rsq.base, card->rsq.paddr);
+	dma_free_coherent(&card->pcidev->dev, RSQSIZE,
+			  card->rsq.base, card->rsq.paddr);
 }
 
 static void
@@ -1057,9 +1058,9 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe)
 
 	vcc = vc->rx_vcc;
 
-	pci_dma_sync_single_for_cpu(card->pcidev, IDT77252_PRV_PADDR(skb),
-				    skb_end_pointer(skb) - skb->data,
-				    PCI_DMA_FROMDEVICE);
+	dma_sync_single_for_cpu(&card->pcidev->dev, IDT77252_PRV_PADDR(skb),
+				skb_end_pointer(skb) - skb->data,
+				DMA_FROM_DEVICE);
 
 	if ((vcc->qos.aal == ATM_AAL0) ||
 	    (vcc->qos.aal == ATM_AAL34)) {
@@ -1180,9 +1181,9 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe)
 			return;
 		}
 
-		pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
+		dma_unmap_single(&card->pcidev->dev, IDT77252_PRV_PADDR(skb),
 				 skb_end_pointer(skb) - skb->data,
-				 PCI_DMA_FROMDEVICE);
+				 DMA_FROM_DEVICE);
 		sb_pool_remove(card, skb);
 
 		skb_trim(skb, len);
@@ -1254,9 +1255,9 @@ idt77252_rx_raw(struct idt77252_dev *card)
 	head = IDT77252_PRV_PADDR(queue) + (queue->data - queue->head - 16);
 	tail = readl(SAR_REG_RAWCT);
 
-	pci_dma_sync_single_for_cpu(card->pcidev, IDT77252_PRV_PADDR(queue),
-				    skb_end_offset(queue) - 16,
-				    PCI_DMA_FROMDEVICE);
+	dma_sync_single_for_cpu(&card->pcidev->dev, IDT77252_PRV_PADDR(queue),
+				skb_end_offset(queue) - 16,
+				DMA_FROM_DEVICE);
 
 	while (head != tail) {
 		unsigned int vpi, vci;
@@ -1348,11 +1349,11 @@ drop:
 			if (next) {
 				card->raw_cell_head = next;
 				queue = card->raw_cell_head;
-				pci_dma_sync_single_for_cpu(card->pcidev,
-							    IDT77252_PRV_PADDR(queue),
-							    (skb_end_pointer(queue) -
-							     queue->data),
-							    PCI_DMA_FROMDEVICE);
+				dma_sync_single_for_cpu(&card->pcidev->dev,
+							IDT77252_PRV_PADDR(queue),
+							(skb_end_pointer(queue) -
+							 queue->data),
+							DMA_FROM_DEVICE);
 			} else {
 				card->raw_cell_head = NULL;
 				printk("%s: raw cell queue overrun\n",
@@ -1375,8 +1376,8 @@ init_tsq(struct idt77252_dev *card)
 {
 	struct tsq_entry *tsqe;
 
-	card->tsq.base = pci_alloc_consistent(card->pcidev, RSQSIZE,
-					      &card->tsq.paddr);
+	card->tsq.base = dma_alloc_coherent(&card->pcidev->dev, RSQSIZE,
+					    &card->tsq.paddr, GFP_KERNEL);
 	if (card->tsq.base == NULL) {
 		printk("%s: can't allocate TSQ.\n", card->name);
 		return -1;
@@ -1398,8 +1399,8 @@ init_tsq(struct idt77252_dev *card)
 static void
 deinit_tsq(struct idt77252_dev *card)
 {
-	pci_free_consistent(card->pcidev, TSQSIZE,
-			    card->tsq.base, card->tsq.paddr);
+	dma_free_coherent(&card->pcidev->dev, TSQSIZE,
+			  card->tsq.base, card->tsq.paddr);
 }
 
 static void
@@ -1861,9 +1862,9 @@ add_rx_skb(struct idt77252_dev *card, int queue,
 			goto outfree;
 		}
 
-		paddr = pci_map_single(card->pcidev, skb->data,
+		paddr = dma_map_single(&card->pcidev->dev, skb->data,
 				       skb_end_pointer(skb) - skb->data,
-				       PCI_DMA_FROMDEVICE);
+				       DMA_FROM_DEVICE);
 		IDT77252_PRV_PADDR(skb) = paddr;
 
 		if (push_rx_skb(card, skb, queue)) {
@@ -1875,8 +1876,8 @@ add_rx_skb(struct idt77252_dev *card, int queue,
 	return;
 
 outunmap:
-	pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
-			 skb_end_pointer(skb) - skb->data, PCI_DMA_FROMDEVICE);
+	dma_unmap_single(&card->pcidev->dev, IDT77252_PRV_PADDR(skb),
+			 skb_end_pointer(skb) - skb->data, DMA_FROM_DEVICE);
 
 	handle = IDT77252_PRV_POOL(skb);
 	card->sbpool[POOL_QUEUE(handle)].skb[POOL_INDEX(handle)] = NULL;
@@ -1892,15 +1893,15 @@ recycle_rx_skb(struct idt77252_dev *card, struct sk_buff *skb)
 	u32 handle = IDT77252_PRV_POOL(skb);
 	int err;
 
-	pci_dma_sync_single_for_device(card->pcidev, IDT77252_PRV_PADDR(skb),
-				       skb_end_pointer(skb) - skb->data,
-				       PCI_DMA_FROMDEVICE);
+	dma_sync_single_for_device(&card->pcidev->dev, IDT77252_PRV_PADDR(skb),
+				   skb_end_pointer(skb) - skb->data,
+				   DMA_FROM_DEVICE);
 
 	err = push_rx_skb(card, skb, POOL_QUEUE(handle));
 	if (err) {
-		pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
+		dma_unmap_single(&card->pcidev->dev, IDT77252_PRV_PADDR(skb),
 				 skb_end_pointer(skb) - skb->data,
-				 PCI_DMA_FROMDEVICE);
+				 DMA_FROM_DEVICE);
 		sb_pool_remove(card, skb);
 		dev_kfree_skb(skb);
 	}
@@ -3058,11 +3059,11 @@ deinit_card(struct idt77252_dev *card)
 		for (j = 0; j < FBQ_SIZE; j++) {
 			skb = card->sbpool[i].skb[j];
 			if (skb) {
-				pci_unmap_single(card->pcidev,
+				dma_unmap_single(&card->pcidev->dev,
 						 IDT77252_PRV_PADDR(skb),
 						 (skb_end_pointer(skb) -
 						  skb->data),
-						 PCI_DMA_FROMDEVICE);
+						 DMA_FROM_DEVICE);
 				card->sbpool[i].skb[j] = NULL;
 				dev_kfree_skb(skb);
 			}
@@ -3076,8 +3077,8 @@ deinit_card(struct idt77252_dev *card)
 	vfree(card->vcs);
 
 	if (card->raw_cell_hnd) {
-		pci_free_consistent(card->pcidev, 2 * sizeof(u32),
-				    card->raw_cell_hnd, card->raw_cell_paddr);
+		dma_free_coherent(&card->pcidev->dev, 2 * sizeof(u32),
+				  card->raw_cell_hnd, card->raw_cell_paddr);
 	}
 
 	if (card->rsq.base) {
@@ -3397,9 +3398,10 @@ static int init_card(struct atm_dev *dev)
 	writel(0, SAR_REG_GP);
 
 	/* Initialize RAW Cell Handle Register  */
-	card->raw_cell_hnd = pci_zalloc_consistent(card->pcidev,
-						   2 * sizeof(u32),
-						   &card->raw_cell_paddr);
+	card->raw_cell_hnd = dma_zalloc_coherent(&card->pcidev->dev,
+						 2 * sizeof(u32),
+						 &card->raw_cell_paddr,
+						 GFP_KERNEL);
 	if (!card->raw_cell_hnd) {
 		printk("%s: memory allocation failure.\n", card->name);
 		deinit_card(card);
@@ -3611,6 +3613,11 @@ static int idt77252_init_one(struct pci_dev *pcidev,
 		return err;
 	}
 
+	if ((err = dma_set_mask_and_coherent(&pcidev->dev, DMA_BIT_MASK(32)))) {
+		printk("idt77252: can't enable DMA for PCI device at %s\n", pci_name(pcidev));
+		return err;
+	}
+
 	card = kzalloc(sizeof(struct idt77252_dev), GFP_KERNEL);
 	if (!card) {
 		printk("idt77252-%d: can't allocate private data\n", index);
diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c
index 4217f29..924f8e2 100644
--- a/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@ -1185,8 +1185,8 @@ static int rx_pkt(struct atm_dev *dev)
 
 	/* Build the DLE structure */  
 	wr_ptr = iadev->rx_dle_q.write;  
-	wr_ptr->sys_pkt_addr = pci_map_single(iadev->pci, skb->data,
-		len, PCI_DMA_FROMDEVICE);
+	wr_ptr->sys_pkt_addr = dma_map_single(&iadev->pci->dev, skb->data,
+					      len, DMA_FROM_DEVICE);
 	wr_ptr->local_pkt_addr = buf_addr;  
 	wr_ptr->bytes = len;	/* We don't know this do we ?? */  
 	wr_ptr->mode = DMA_INT_ENABLE;  
@@ -1306,8 +1306,8 @@ static void rx_dle_intr(struct atm_dev *dev)
           u_short length;
           struct ia_vcc *ia_vcc;
 
-	  pci_unmap_single(iadev->pci, iadev->rx_dle_q.write->sys_pkt_addr,
-	  	len, PCI_DMA_FROMDEVICE);
+	  dma_unmap_single(&iadev->pci->dev, iadev->rx_dle_q.write->sys_pkt_addr,
+			   len, DMA_FROM_DEVICE);
           /* no VCC related housekeeping done as yet. lets see */  
           vcc = ATM_SKB(skb)->vcc;
 	  if (!vcc) {
@@ -1430,8 +1430,8 @@ static int rx_init(struct atm_dev *dev)
   //    spin_lock_init(&iadev->rx_lock); 
   
 	/* Allocate 4k bytes - more aligned than needed (4k boundary) */
-	dle_addr = pci_alloc_consistent(iadev->pci, DLE_TOTAL_SIZE,
-					&iadev->rx_dle_dma);  
+	dle_addr = dma_alloc_coherent(&iadev->pci->dev, DLE_TOTAL_SIZE,
+				      &iadev->rx_dle_dma, GFP_KERNEL);
 	if (!dle_addr)  {  
 		printk(KERN_ERR DEV_LABEL "can't allocate DLEs\n");
 		goto err_out;
@@ -1631,8 +1631,8 @@ static int rx_init(struct atm_dev *dev)
 	return 0;  
 
 err_free_dle:
-	pci_free_consistent(iadev->pci, DLE_TOTAL_SIZE, iadev->rx_dle_q.start,
-			    iadev->rx_dle_dma);  
+	dma_free_coherent(&iadev->pci->dev, DLE_TOTAL_SIZE, iadev->rx_dle_q.start,
+			  iadev->rx_dle_dma);
 err_out:
 	return -ENOMEM;
 }  
@@ -1702,8 +1702,8 @@ static void tx_dle_intr(struct atm_dev *dev)
 
 	    /* Revenge of the 2 dle (skb + trailer) used in ia_pkt_tx() */
 	    if (!((dle - iadev->tx_dle_q.start)%(2*sizeof(struct dle)))) {
-		pci_unmap_single(iadev->pci, dle->sys_pkt_addr, skb->len,
-				 PCI_DMA_TODEVICE);
+		dma_unmap_single(&iadev->pci->dev, dle->sys_pkt_addr, skb->len,
+				 DMA_TO_DEVICE);
 	    }
             vcc = ATM_SKB(skb)->vcc;
             if (!vcc) {
@@ -1917,8 +1917,8 @@ static int tx_init(struct atm_dev *dev)
                                 readw(iadev->seg_reg+SEG_MASK_REG));)  
 
 	/* Allocate 4k (boundary aligned) bytes */
-	dle_addr = pci_alloc_consistent(iadev->pci, DLE_TOTAL_SIZE,
-					&iadev->tx_dle_dma);  
+	dle_addr = dma_alloc_coherent(&iadev->pci->dev, DLE_TOTAL_SIZE,
+				      &iadev->tx_dle_dma, GFP_KERNEL);
 	if (!dle_addr)  {
 		printk(KERN_ERR DEV_LABEL "can't allocate DLEs\n");
 		goto err_out;
@@ -1989,8 +1989,10 @@ static int tx_init(struct atm_dev *dev)
 		goto err_free_tx_bufs;
             }
 	    iadev->tx_buf[i].cpcs = cpcs;
-	    iadev->tx_buf[i].dma_addr = pci_map_single(iadev->pci,
-		cpcs, sizeof(*cpcs), PCI_DMA_TODEVICE);
+	    iadev->tx_buf[i].dma_addr = dma_map_single(&iadev->pci->dev,
+						       cpcs,
+						       sizeof(*cpcs),
+						       DMA_TO_DEVICE);
         }
         iadev->desc_tbl = kmalloc(iadev->num_tx_desc *
                                    sizeof(struct desc_tbl_t), GFP_KERNEL);
@@ -2198,14 +2200,14 @@ err_free_tx_bufs:
 	while (--i >= 0) {
 		struct cpcs_trailer_desc *desc = iadev->tx_buf + i;
 
-		pci_unmap_single(iadev->pci, desc->dma_addr,
-			sizeof(*desc->cpcs), PCI_DMA_TODEVICE);
+		dma_unmap_single(&iadev->pci->dev, desc->dma_addr,
+				 sizeof(*desc->cpcs), DMA_TO_DEVICE);
 		kfree(desc->cpcs);
 	}
 	kfree(iadev->tx_buf);
 err_free_dle:
-	pci_free_consistent(iadev->pci, DLE_TOTAL_SIZE, iadev->tx_dle_q.start,
-			    iadev->tx_dle_dma);  
+	dma_free_coherent(&iadev->pci->dev, DLE_TOTAL_SIZE, iadev->tx_dle_q.start,
+			  iadev->tx_dle_dma);
 err_out:
 	return -ENOMEM;
 }   
@@ -2476,20 +2478,20 @@ static void ia_free_tx(IADEV *iadev)
 	for (i = 0; i < iadev->num_tx_desc; i++) {
 		struct cpcs_trailer_desc *desc = iadev->tx_buf + i;
 
-		pci_unmap_single(iadev->pci, desc->dma_addr,
-			sizeof(*desc->cpcs), PCI_DMA_TODEVICE);
+		dma_unmap_single(&iadev->pci->dev, desc->dma_addr,
+				 sizeof(*desc->cpcs), DMA_TO_DEVICE);
 		kfree(desc->cpcs);
 	}
 	kfree(iadev->tx_buf);
-	pci_free_consistent(iadev->pci, DLE_TOTAL_SIZE, iadev->tx_dle_q.start,
-			    iadev->tx_dle_dma);  
+	dma_free_coherent(&iadev->pci->dev, DLE_TOTAL_SIZE, iadev->tx_dle_q.start,
+			  iadev->tx_dle_dma);
 }
 
 static void ia_free_rx(IADEV *iadev)
 {
 	kfree(iadev->rx_open);
-	pci_free_consistent(iadev->pci, DLE_TOTAL_SIZE, iadev->rx_dle_q.start,
-			  iadev->rx_dle_dma);  
+	dma_free_coherent(&iadev->pci->dev, DLE_TOTAL_SIZE, iadev->rx_dle_q.start,
+			  iadev->rx_dle_dma);
 }
 
 static int ia_start(struct atm_dev *dev)
@@ -3009,8 +3011,8 @@ static int ia_pkt_tx (struct atm_vcc *vcc, struct sk_buff *skb) {
 	/* Build the DLE structure */  
 	wr_ptr = iadev->tx_dle_q.write;  
 	memset((caddr_t)wr_ptr, 0, sizeof(*wr_ptr));  
-	wr_ptr->sys_pkt_addr = pci_map_single(iadev->pci, skb->data,
-		skb->len, PCI_DMA_TODEVICE);
+	wr_ptr->sys_pkt_addr = dma_map_single(&iadev->pci->dev, skb->data,
+					      skb->len, DMA_TO_DEVICE);
 	wr_ptr->local_pkt_addr = (buf_desc_ptr->buf_start_hi << 16) | 
                                                   buf_desc_ptr->buf_start_lo;  
 	/* wr_ptr->bytes = swap_byte_order(total_len); didn't seem to affect?? */
diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c
index 93eaf8d..02fae18 100644
--- a/drivers/atm/lanai.c
+++ b/drivers/atm/lanai.c
@@ -346,7 +346,8 @@ static void lanai_buf_allocate(struct lanai_buffer *buf,
 		 * everything, but the way the lanai uses DMA memory would
 		 * make that a terrific pain.  This is much simpler.
 		 */
-		buf->start = pci_alloc_consistent(pci, size, &buf->dmaaddr);
+		buf->start = dma_alloc_coherent(&pci->dev,
+						size, &buf->dmaaddr, GFP_KERNEL);
 		if (buf->start != NULL) {	/* Success */
 			/* Lanai requires 256-byte alignment of DMA bufs */
 			APRINTK((buf->dmaaddr & ~0xFFFFFF00) == 0,
@@ -372,8 +373,8 @@ static void lanai_buf_deallocate(struct lanai_buffer *buf,
 	struct pci_dev *pci)
 {
 	if (buf->start != NULL) {
-		pci_free_consistent(pci, lanai_buf_size(buf),
-		    buf->start, buf->dmaaddr);
+		dma_free_coherent(&pci->dev, lanai_buf_size(buf),
+				  buf->start, buf->dmaaddr);
 		buf->start = buf->end = buf->ptr = NULL;
 	}
 }
@@ -1953,12 +1954,7 @@ static int lanai_pci_start(struct lanai_dev *lanai)
 		return -ENXIO;
 	}
 	pci_set_master(pci);
-	if (pci_set_dma_mask(pci, DMA_BIT_MASK(32)) != 0) {
-		printk(KERN_WARNING DEV_LABEL
-		    "(itf %d): No suitable DMA available.\n", lanai->number);
-		return -EBUSY;
-	}
-	if (pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(32)) != 0) {
+	if (dma_set_mask_and_coherent(&pci->dev, DMA_BIT_MASK(32)) != 0) {
 		printk(KERN_WARNING DEV_LABEL
 		    "(itf %d): No suitable DMA available.\n", lanai->number);
 		return -EBUSY;
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index 9988ac9..b7e1cc0 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -252,10 +252,10 @@ static void nicstar_remove_one(struct pci_dev *pcidev)
 			free_scq(card, card->scd2vc[j]->scq, card->scd2vc[j]->tx_vcc);
 	}
 	idr_destroy(&card->idr);
-	pci_free_consistent(card->pcidev, NS_RSQSIZE + NS_RSQ_ALIGNMENT,
-			    card->rsq.org, card->rsq.dma);
-	pci_free_consistent(card->pcidev, NS_TSQSIZE + NS_TSQ_ALIGNMENT,
-			    card->tsq.org, card->tsq.dma);
+	dma_free_coherent(&card->pcidev->dev, NS_RSQSIZE + NS_RSQ_ALIGNMENT,
+			  card->rsq.org, card->rsq.dma);
+	dma_free_coherent(&card->pcidev->dev, NS_TSQSIZE + NS_TSQ_ALIGNMENT,
+			  card->tsq.org, card->tsq.dma);
 	free_irq(card->pcidev->irq, card);
 	iounmap(card->membase);
 	kfree(card);
@@ -370,8 +370,7 @@ static int ns_init_card(int i, struct pci_dev *pcidev)
 		ns_init_card_error(card, error);
 		return error;
 	}
-        if ((pci_set_dma_mask(pcidev, DMA_BIT_MASK(32)) != 0) ||
-	    (pci_set_consistent_dma_mask(pcidev, DMA_BIT_MASK(32)) != 0)) {
+        if (dma_set_mask_and_coherent(&pcidev->dev, DMA_BIT_MASK(32)) != 0) {
                 printk(KERN_WARNING
 		       "nicstar%d: No suitable DMA available.\n", i);
 		error = 2;
@@ -535,9 +534,9 @@ static int ns_init_card(int i, struct pci_dev *pcidev)
 	writel(0x00000000, card->membase + VPM);
 
 	/* Initialize TSQ */
-	card->tsq.org = pci_alloc_consistent(card->pcidev,
-					     NS_TSQSIZE + NS_TSQ_ALIGNMENT,
-					     &card->tsq.dma);
+	card->tsq.org = dma_alloc_coherent(&card->pcidev->dev,
+					   NS_TSQSIZE + NS_TSQ_ALIGNMENT,
+					   &card->tsq.dma, GFP_KERNEL);
 	if (card->tsq.org == NULL) {
 		printk("nicstar%d: can't allocate TSQ.\n", i);
 		error = 10;
@@ -554,9 +553,9 @@ static int ns_init_card(int i, struct pci_dev *pcidev)
 	PRINTK("nicstar%d: TSQ base at 0x%p.\n", i, card->tsq.base);
 
 	/* Initialize RSQ */
-	card->rsq.org = pci_alloc_consistent(card->pcidev,
-					     NS_RSQSIZE + NS_RSQ_ALIGNMENT,
-					     &card->rsq.dma);
+	card->rsq.org = dma_alloc_coherent(&card->pcidev->dev,
+					   NS_RSQSIZE + NS_RSQ_ALIGNMENT,
+					   &card->rsq.dma, GFP_KERNEL);
 	if (card->rsq.org == NULL) {
 		printk("nicstar%d: can't allocate RSQ.\n", i);
 		error = 11;
@@ -874,7 +873,8 @@ static scq_info *get_scq(ns_dev *card, int size, u32 scd)
 	scq = kmalloc(sizeof(scq_info), GFP_KERNEL);
 	if (!scq)
 		return NULL;
-        scq->org = pci_alloc_consistent(card->pcidev, 2 * size, &scq->dma);
+        scq->org = dma_alloc_coherent(&card->pcidev->dev,
+				      2 * size,  &scq->dma, GFP_KERNEL);
 	if (!scq->org) {
 		kfree(scq);
 		return NULL;
@@ -936,10 +936,10 @@ static void free_scq(ns_dev *card, scq_info *scq, struct atm_vcc *vcc)
 			}
 	}
 	kfree(scq->skb);
-	pci_free_consistent(card->pcidev,
-			    2 * (scq->num_entries == VBR_SCQ_NUM_ENTRIES ?
-				 VBR_SCQSIZE : CBR_SCQSIZE),
-			    scq->org, scq->dma);
+	dma_free_coherent(&card->pcidev->dev,
+			  2 * (scq->num_entries == VBR_SCQ_NUM_ENTRIES ?
+			       VBR_SCQSIZE : CBR_SCQSIZE),
+			  scq->org, scq->dma);
 	kfree(scq);
 }
 
@@ -957,11 +957,11 @@ static void push_rxbufs(ns_dev * card, struct sk_buff *skb)
 	handle2 = NULL;
 	addr2 = 0;
 	handle1 = skb;
-	addr1 = pci_map_single(card->pcidev,
+	addr1 = dma_map_single(&card->pcidev->dev,
 			       skb->data,
 			       (NS_PRV_BUFTYPE(skb) == BUF_SM
 				? NS_SMSKBSIZE : NS_LGSKBSIZE),
-			       PCI_DMA_TODEVICE);
+			       DMA_TO_DEVICE);
 	NS_PRV_DMA(skb) = addr1; /* save so we can unmap later */
 
 #ifdef GENERAL_DEBUG
@@ -1670,8 +1670,8 @@ static int ns_send(struct atm_vcc *vcc, struct sk_buff *skb)
 
 	ATM_SKB(skb)->vcc = vcc;
 
-	NS_PRV_DMA(skb) = pci_map_single(card->pcidev, skb->data,
-					 skb->len, PCI_DMA_TODEVICE);
+	NS_PRV_DMA(skb) = dma_map_single(&card->pcidev->dev, skb->data,
+					 skb->len, DMA_TO_DEVICE);
 
 	if (vcc->qos.aal == ATM_AAL5) {
 		buflen = (skb->len + 47 + 8) / 48 * 48;	/* Multiple of 48 */
@@ -1930,10 +1930,10 @@ static void drain_scq(ns_dev * card, scq_info * scq, int pos)
 		XPRINTK("nicstar%d: freeing skb at 0x%p (index %d).\n",
 			card->index, skb, i);
 		if (skb != NULL) {
-			pci_unmap_single(card->pcidev,
+			dma_unmap_single(&card->pcidev->dev,
 					 NS_PRV_DMA(skb),
 					 skb->len,
-					 PCI_DMA_TODEVICE);
+					 DMA_TO_DEVICE);
 			vcc = ATM_SKB(skb)->vcc;
 			if (vcc && vcc->pop != NULL) {
 				vcc->pop(vcc, skb);
@@ -1992,16 +1992,16 @@ static void dequeue_rx(ns_dev * card, ns_rsqe * rsqe)
 		return;
 	}
 	idr_remove(&card->idr, id);
-        pci_dma_sync_single_for_cpu(card->pcidev,
-				    NS_PRV_DMA(skb),
-				    (NS_PRV_BUFTYPE(skb) == BUF_SM
-				     ? NS_SMSKBSIZE : NS_LGSKBSIZE),
-				    PCI_DMA_FROMDEVICE);
-	pci_unmap_single(card->pcidev,
+	dma_sync_single_for_cpu(&card->pcidev->dev,
+				NS_PRV_DMA(skb),
+				(NS_PRV_BUFTYPE(skb) == BUF_SM
+				 ? NS_SMSKBSIZE : NS_LGSKBSIZE),
+				DMA_FROM_DEVICE);
+	dma_unmap_single(&card->pcidev->dev,
 			 NS_PRV_DMA(skb),
 			 (NS_PRV_BUFTYPE(skb) == BUF_SM
 			  ? NS_SMSKBSIZE : NS_LGSKBSIZE),
-			 PCI_DMA_FROMDEVICE);
+			 DMA_FROM_DEVICE);
 	vpi = ns_rsqe_vpi(rsqe);
 	vci = ns_rsqe_vci(rsqe);
 	if (vpi >= 1UL << card->vpibits || vci >= 1UL << card->vcibits) {
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index 21b0bc6..74e18b0 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -785,8 +785,8 @@ static void solos_bh(unsigned long card_arg)
 				skb = card->rx_skb[port];
 				card->rx_skb[port] = NULL;
 
-				pci_unmap_single(card->dev, SKB_CB(skb)->dma_addr,
-						 RX_DMA_SIZE, PCI_DMA_FROMDEVICE);
+				dma_unmap_single(&card->dev->dev, SKB_CB(skb)->dma_addr,
+						 RX_DMA_SIZE, DMA_FROM_DEVICE);
 
 				header = (void *)skb->data;
 				size = le16_to_cpu(header->size);
@@ -872,8 +872,8 @@ static void solos_bh(unsigned long card_arg)
 			struct sk_buff *skb = alloc_skb(RX_DMA_SIZE, GFP_ATOMIC);
 			if (skb) {
 				SKB_CB(skb)->dma_addr =
-					pci_map_single(card->dev, skb->data,
-						       RX_DMA_SIZE, PCI_DMA_FROMDEVICE);
+					dma_map_single(&card->dev->dev, skb->data,
+						       RX_DMA_SIZE, DMA_FROM_DEVICE);
 				iowrite32(SKB_CB(skb)->dma_addr,
 					  card->config_regs + RX_DMA_ADDR(port));
 				card->rx_skb[port] = skb;
@@ -1069,8 +1069,8 @@ static uint32_t fpga_tx(struct solos_card *card)
 		if (tx_pending & 1) {
 			struct sk_buff *oldskb = card->tx_skb[port];
 			if (oldskb) {
-				pci_unmap_single(card->dev, SKB_CB(oldskb)->dma_addr,
-						 oldskb->len, PCI_DMA_TODEVICE);
+				dma_unmap_single(&card->dev->dev, SKB_CB(oldskb)->dma_addr,
+						 oldskb->len, DMA_TO_DEVICE);
 				card->tx_skb[port] = NULL;
 			}
 			spin_lock(&card->tx_queue_lock);
@@ -1089,8 +1089,8 @@ static uint32_t fpga_tx(struct solos_card *card)
 					data = card->dma_bounce + (BUF_SIZE * port);
 					memcpy(data, skb->data, skb->len);
 				}
-				SKB_CB(skb)->dma_addr = pci_map_single(card->dev, data,
-								       skb->len, PCI_DMA_TODEVICE);
+				SKB_CB(skb)->dma_addr = dma_map_single(&card->dev->dev, data,
+								       skb->len, DMA_TO_DEVICE);
 				card->tx_skb[port] = skb;
 				iowrite32(SKB_CB(skb)->dma_addr,
 					  card->config_regs + TX_DMA_ADDR(port));
@@ -1210,7 +1210,7 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		goto out;
 	}
 
-	err = pci_set_dma_mask(dev, DMA_BIT_MASK(32));
+	err = dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32));
 	if (err) {
 		dev_warn(&dev->dev, "Failed to set 32-bit DMA mask\n");
 		goto out;
@@ -1411,14 +1411,14 @@ static void atm_remove(struct solos_card *card)
 
 			skb = card->rx_skb[i];
 			if (skb) {
-				pci_unmap_single(card->dev, SKB_CB(skb)->dma_addr,
-						 RX_DMA_SIZE, PCI_DMA_FROMDEVICE);
+				dma_unmap_single(&card->dev->dev, SKB_CB(skb)->dma_addr,
+						 RX_DMA_SIZE, DMA_FROM_DEVICE);
 				dev_kfree_skb(skb);
 			}
 			skb = card->tx_skb[i];
 			if (skb) {
-				pci_unmap_single(card->dev, SKB_CB(skb)->dma_addr,
-						 skb->len, PCI_DMA_TODEVICE);
+				dma_unmap_single(&card->dev->dev, SKB_CB(skb)->dma_addr,
+						 skb->len, DMA_TO_DEVICE);
 				dev_kfree_skb(skb);
 			}
 			while ((skb = skb_dequeue(&card->tx_queue[i])))
diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c
index 969c3c2..cecfb94 100644
--- a/drivers/atm/zatm.c
+++ b/drivers/atm/zatm.c
@@ -1306,19 +1306,20 @@ static int zatm_start(struct atm_dev *dev)
 
 		if (!mbx_entries[i])
 			continue;
-		mbx = pci_alloc_consistent(pdev, 2*MBX_SIZE(i), &mbx_dma);
+		mbx = dma_alloc_coherent(&pdev->dev,
+					 2 * MBX_SIZE(i), &mbx_dma, GFP_KERNEL);
 		if (!mbx) {
 			error = -ENOMEM;
 			goto out;
 		}
 		/*
-		 * Alignment provided by pci_alloc_consistent() isn't enough
+		 * Alignment provided by dma_alloc_coherent() isn't enough
 		 * for this device.
 		 */
 		if (((unsigned long)mbx ^ mbx_dma) & 0xffff) {
 			printk(KERN_ERR DEV_LABEL "(itf %d): system "
 			       "bus incompatible with driver\n", dev->number);
-			pci_free_consistent(pdev, 2*MBX_SIZE(i), mbx, mbx_dma);
+			dma_free_coherent(&pdev->dev, 2*MBX_SIZE(i), mbx, mbx_dma);
 			error = -ENODEV;
 			goto out;
 		}
@@ -1354,9 +1355,9 @@ out_tx:
 	kfree(zatm_dev->tx_map);
 out:
 	while (i-- > 0) {
-		pci_free_consistent(pdev, 2*MBX_SIZE(i), 
-				    (void *)zatm_dev->mbx_start[i],
-				    zatm_dev->mbx_dma[i]);
+		dma_free_coherent(&pdev->dev, 2 * MBX_SIZE(i),
+				  (void *)zatm_dev->mbx_start[i],
+				  zatm_dev->mbx_dma[i]);
 	}
 	free_irq(zatm_dev->irq, dev);
 	goto done;
@@ -1608,6 +1609,10 @@ static int zatm_init_one(struct pci_dev *pci_dev,
 	if (ret < 0)
 		goto out_disable;
 
+	ret = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32));
+	if (ret < 0)
+		goto out_disable;
+
 	zatm_dev->pci_dev = pci_dev;
 	dev->dev_data = zatm_dev;
 	zatm_dev->copper = (int)ent->driver_data;
-- 
1.9.3

^ permalink raw reply related

* [PATCH 2/2] Drivers: Isdn: sc: Fixed coding style & spelling mistakes.
From: Akash Shende @ 2015-01-16 13:42 UTC (permalink / raw)
  To: isdn; +Cc: netdev, linux-kernel

Fix some spelling mistakes, coding style and don't assign value to static var.

Signed-off-by: Akash Shende <akash0x53s@gmail.com>
---
 drivers/isdn/sc/init.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/isdn/sc/init.c b/drivers/isdn/sc/init.c
index d6f19b1..3597ef4 100644
--- a/drivers/isdn/sc/init.c
+++ b/drivers/isdn/sc/init.c
@@ -30,7 +30,7 @@ static const char *boardname[] = { "DataCommute/BRI", "DataCommute/PRI", "TeleCo
 static unsigned int io[] = {0, 0, 0, 0};
 static unsigned char irq[] = {0, 0, 0, 0};
 static unsigned long ram[] = {0, 0, 0, 0};
-static bool do_reset = 0;
+static bool do_reset;
 
 module_param_array(io, int, NULL, 0);
 module_param_array(irq, byte, NULL, 0);
@@ -104,13 +104,12 @@ static int __init sc_init(void)
 					 io[b] + 0x400 * EXP_PAGE0);
 				continue;
 			}
-		}
-		else {
+		} else {
 			/*
 			 * Yes, probe for I/O Base
 			 */
 			if (probe_exhasted) {
-				pr_debug("All probe addresses exhasted, skipping\n");
+				pr_debug("All probe addresses exhausted, skipping\n");
 				continue;
 			}
 			pr_debug("Probing for I/O...\n");
@@ -169,8 +168,7 @@ static int __init sc_init(void)
 				model = identify_board(ram[b], io[b]);
 				release_region(ram[b], SRAM_PAGESIZE);
 			}
-		}
-		else {
+		} else {
 			/*
 			 * Yes, probe for free RAM and look for
 			 * a signature and id the board model
@@ -187,7 +185,7 @@ static int __init sc_init(void)
 						ram[b] = i;
 						break;
 					}
-					pr_debug("  Unidentifed or inaccessible\n");
+					pr_debug("  Unidentified or inaccessible\n");
 					continue;
 				}
 				pr_debug("  request failed\n");
@@ -337,8 +335,7 @@ static int __init sc_init(void)
 		sc_adapter[cinst]->interrupt = irq[b];
 		if (request_irq(sc_adapter[cinst]->interrupt, interrupt_handler,
 				0, interface->id,
-				(void *)(unsigned long) cinst))
-		{
+				(void *)(unsigned long) cinst)) {
 			kfree(sc_adapter[cinst]->channel);
 			indicate_status(cinst, ISDN_STAT_UNLOAD, 0, NULL);	/* Fix me */
 			kfree(interface);
-- 
2.2.2

^ permalink raw reply related

* [PATCH net-next] niu: remove one compound_head() call
From: Eric Dumazet @ 2015-01-16 13:39 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

From: Eric Dumazet <edumazet@google.com>

After a "page = alloc_page(mask);", we do not need to use
compound_head() : page already points to the right place.

This would be true even if using alloc_pages().

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 drivers/net/ethernet/sun/niu.c |    3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 0c6416213837..4b51f903fb73 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -3341,8 +3341,7 @@ static int niu_rbr_add_page(struct niu *np, struct rx_ring_info *rp,
 
 	niu_hash_page(rp, page, addr);
 	if (rp->rbr_blocks_per_page > 1)
-		atomic_add(rp->rbr_blocks_per_page - 1,
-			   &compound_head(page)->_count);
+		atomic_add(rp->rbr_blocks_per_page - 1, &page->_count);
 
 	for (i = 0; i < rp->rbr_blocks_per_page; i++) {
 		__le32 *rbr = &rp->rbr[start_index + i];

^ permalink raw reply related

* [PATCH net-next] socket: use ki_nbytes instead of iov_length()
From: Nicolas Dichtel @ 2015-01-16 13:35 UTC (permalink / raw)
  To: viro; +Cc: netdev, davem, Nicolas Dichtel
In-Reply-To: <20150115214519.GB29656@ZenIV.linux.org.uk>

This field already contains the length of the iovec, no need to calculate it
again.

Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---
 net/socket.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index e1278d7e1d5d..9fcf15b61009 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -882,16 +882,15 @@ static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
 		unsigned long nr_segs)
 {
 	struct socket *sock = file->private_data;
-	size_t size = iov_length(iov, nr_segs);
 
 	msg->msg_name = NULL;
 	msg->msg_namelen = 0;
 	msg->msg_control = NULL;
 	msg->msg_controllen = 0;
-	iov_iter_init(&msg->msg_iter, READ, iov, nr_segs, size);
+	iov_iter_init(&msg->msg_iter, READ, iov, nr_segs, iocb->ki_nbytes);
 	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 
-	return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
+	return __sock_recvmsg(iocb, sock, msg, iocb->ki_nbytes, msg->msg_flags);
 }
 
 static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
@@ -917,18 +916,17 @@ static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
 			unsigned long nr_segs)
 {
 	struct socket *sock = file->private_data;
-	size_t size = iov_length(iov, nr_segs);
 
 	msg->msg_name = NULL;
 	msg->msg_namelen = 0;
 	msg->msg_control = NULL;
 	msg->msg_controllen = 0;
-	iov_iter_init(&msg->msg_iter, WRITE, iov, nr_segs, size);
+	iov_iter_init(&msg->msg_iter, WRITE, iov, nr_segs, iocb->ki_nbytes);
 	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 	if (sock->type == SOCK_SEQPACKET)
 		msg->msg_flags |= MSG_EOR;
 
-	return __sock_sendmsg(iocb, sock, msg, size);
+	return __sock_sendmsg(iocb, sock, msg, iocb->ki_nbytes);
 }
 
 static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
-- 
2.2.2

^ permalink raw reply related

* Re: [PATCH net-next] socket: use iov_length()
From: Nicolas Dichtel @ 2015-01-16 13:31 UTC (permalink / raw)
  To: Al Viro; +Cc: netdev, davem
In-Reply-To: <20150115214519.GB29656@ZenIV.linux.org.uk>

Le 15/01/2015 22:45, Al Viro a écrit :
> On Wed, Jan 14, 2015 at 11:07:50AM +0100, Nicolas Dichtel wrote:
>> Better to use available helpers.
>>
>> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
>> ---
>>   net/socket.c | 8 ++------
>>   1 file changed, 2 insertions(+), 6 deletions(-)
>>
>> diff --git a/net/socket.c b/net/socket.c
>> index a2c33a4dc7ba..b1f3fa4da020 100644
>> --- a/net/socket.c
>> +++ b/net/socket.c
>> @@ -883,10 +883,8 @@ static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
>>   {
>>   	struct socket *sock = file->private_data;
>>   	size_t size = 0;
>> -	int i;
>>
>> -	for (i = 0; i < nr_segs; i++)
>> -		size += iov[i].iov_len;
>> +	size = iov_length(iov, nr_segs);
>
> What's wrong with size = iocb->ki_nbytes instead of calling anything?
>
I'm not an expert, but it seems you're right. I will send a patch.

^ permalink raw reply

* [net-next v2 12/17] i40e: AQ API updates
From: Jeff Kirsher @ 2015-01-16 13:29 UTC (permalink / raw)
  To: davem
  Cc: Jeff Kirsher, netdev, nhorman, sassmann, jogreene, gerlitz.or,
	Shannon Nelson, Kamil Krawczyk
In-Reply-To: <1421414946-22179-1-git-send-email-jeffrey.t.kirsher@intel.com>

Fix up NVM config read and write data structs.

Signed-off-by: Shannon Nelson <shannon.nelson@intel.com>
Signed-off-by: Kamil Krawczyk <kamil.krawczyk@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h   | 10 +++++-----
 drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index 20cada5..70b8e55 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -1831,12 +1831,12 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_update);
 /* NVM Config Read (indirect 0x0704) */
 struct i40e_aqc_nvm_config_read {
 	__le16	cmd_flags;
-#define ANVM_SINGLE_OR_MULTIPLE_FEATURES_MASK	1
-#define ANVM_READ_SINGLE_FEATURE		0
-#define ANVM_READ_MULTIPLE_FEATURES		1
+#define I40E_AQ_ANVM_SINGLE_OR_MULTIPLE_FEATURES_MASK	1 
+#define I40E_AQ_ANVM_READ_SINGLE_FEATURE		0 
+#define I40E_AQ_ANVM_READ_MULTIPLE_FEATURES		1
 	__le16	element_count;
-	__le16	element_id; /* Feature/field ID */
-	u8	reserved[2];
+	__le16	element_id;	/* Feature/field ID */
+	__le16	element_id_msw;	/* MSWord of field ID */
 	__le32	address_high;
 	__le32	address_low;
 };
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
index 1b80846..5363cbf 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
@@ -1831,12 +1831,12 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_update);
 /* NVM Config Read (indirect 0x0704) */
 struct i40e_aqc_nvm_config_read {
 	__le16	cmd_flags;
-#define ANVM_SINGLE_OR_MULTIPLE_FEATURES_MASK	1
-#define ANVM_READ_SINGLE_FEATURE		0
-#define ANVM_READ_MULTIPLE_FEATURES		1
+#define I40E_AQ_ANVM_SINGLE_OR_MULTIPLE_FEATURES_MASK	1
+#define I40E_AQ_ANVM_READ_SINGLE_FEATURE		0
+#define I40E_AQ_ANVM_READ_MULTIPLE_FEATURES		1
 	__le16	element_count;
-	__le16	element_id; /* Feature/field ID */
-	u8	reserved[2];
+	__le16	element_id;	/* Feature/field ID */
+	__le16	element_id_msw;	/* MSWord of field ID */
 	__le32	address_high;
 	__le32	address_low;
 };
-- 
1.9.3

^ permalink raw reply related

* [net-next v2 08/17] i40e: only enable PTP interrupt cause if PTP is enabled
From: Jeff Kirsher @ 2015-01-16 13:28 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <1421414946-22179-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

We should not blindly enable the PTP interrupt flags for all PFs. We
should only enable the PTP interrupt in PFs which have enabled
PTP.

Change-ID: I051a17cae4c199a2f3cf7852266e27eda6630525
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index c3a353d..0cc0266 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2819,8 +2819,9 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
  * i40e_enable_misc_int_causes - enable the non-queue interrupts
  * @hw: ptr to the hardware info
  **/
-static void i40e_enable_misc_int_causes(struct i40e_hw *hw)
+static void i40e_enable_misc_int_causes(struct i40e_pf *pf)
 {
+	struct i40e_hw *hw = &pf->hw;
 	u32 val;
 
 	/* clear things first */
@@ -2832,11 +2833,13 @@ static void i40e_enable_misc_int_causes(struct i40e_hw *hw)
 	      I40E_PFINT_ICR0_ENA_GRST_MASK          |
 	      I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK |
 	      I40E_PFINT_ICR0_ENA_GPIO_MASK          |
-	      I40E_PFINT_ICR0_ENA_TIMESYNC_MASK      |
 	      I40E_PFINT_ICR0_ENA_HMC_ERR_MASK       |
 	      I40E_PFINT_ICR0_ENA_VFLR_MASK          |
 	      I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
 
+	if (pf->flags & I40E_FLAG_PTP)
+		val |= I40E_PFINT_ICR0_ENA_TIMESYNC_MASK;
+
 	wr32(hw, I40E_PFINT_ICR0_ENA, val);
 
 	/* SW_ITR_IDX = 0, but don't change INTENA */
@@ -2866,7 +2869,7 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi)
 	q_vector->tx.latency_range = I40E_LOW_LATENCY;
 	wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.itr);
 
-	i40e_enable_misc_int_causes(hw);
+	i40e_enable_misc_int_causes(pf);
 
 	/* FIRSTQ_INDX = 0, FIRSTQ_TYPE = 0 (rx) */
 	wr32(hw, I40E_PFINT_LNKLST0, 0);
@@ -7137,7 +7140,7 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf)
 		}
 	}
 
-	i40e_enable_misc_int_causes(hw);
+	i40e_enable_misc_int_causes(pf);
 
 	/* associate no queues to the misc vector */
 	wr32(hw, I40E_PFINT_LNKLST0, I40E_QUEUE_END_OF_LIST);
-- 
1.9.3

^ permalink raw reply related

* [net-next v2 13/17] i40e: AQ API updates for new commands
From: Jeff Kirsher @ 2015-01-16 13:29 UTC (permalink / raw)
  To: davem
  Cc: Shannon Nelson, netdev, nhorman, sassmann, jogreene, gerlitz.or,
	Kamil Krawczyk, Jeff Kirsher
In-Reply-To: <1421414946-22179-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Shannon Nelson <shannon.nelson@intel.com>

Add lldp control commands, add oem ocsd and ocbb commands.

Change-ID: I89eba2bd02013d0a44e1ce900559c54bb15f4a66
Signed-off-by: Shannon Nelson <shannon.nelson@intel.com>
Signed-off-by: Kamil Krawczyk <kamil.krawczyk@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h  | 90 ++++++++++++++++++++--
 .../net/ethernet/intel/i40evf/i40e_adminq_cmd.h    | 46 +++++++++--
 2 files changed, 126 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index 70b8e55..929e3d7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -256,6 +256,8 @@ enum i40e_admin_queue_opc {
 	i40e_aqc_opc_lldp_stop		= 0x0A05,
 	i40e_aqc_opc_lldp_start		= 0x0A06,
 	i40e_aqc_opc_get_cee_dcb_cfg	= 0x0A07,
+	i40e_aqc_opc_lldp_set_local_mib	= 0x0A08,
+	i40e_aqc_opc_lldp_stop_start_spec_agent	= 0x0A09,
 
 	/* Tunnel commands */
 	i40e_aqc_opc_add_udp_tunnel	= 0x0B00,
@@ -268,6 +270,8 @@ enum i40e_admin_queue_opc {
 	/* OEM commands */
 	i40e_aqc_opc_oem_parameter_change	= 0xFE00,
 	i40e_aqc_opc_oem_device_status_change	= 0xFE01,
+	i40e_aqc_opc_oem_ocsd_initialize	= 0xFE02,
+	i40e_aqc_opc_oem_ocbb_initialize	= 0xFE03,
 
 	/* debug commands */
 	i40e_aqc_opc_debug_get_deviceid		= 0xFF00,
@@ -454,6 +458,7 @@ struct i40e_aqc_arp_proxy_data {
 	__le32	pfpm_proxyfc;
 	__le32	ip_addr;
 	u8	mac_addr[6];
+	u8	reserved[2];
 };
 
 I40E_CHECK_STRUCT_LEN(0x14, i40e_aqc_arp_proxy_data);
@@ -1854,21 +1859,32 @@ struct i40e_aqc_nvm_config_write {
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_config_write);
 
+/* Used for 0x0704 as well as for 0x0705 commands */
+#define I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_SHIFT		1
+#define I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_MASK \
+				(1 << I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_SHIFT)
+#define I40E_AQ_ANVM_FEATURE		0
+#define I40E_AQ_ANVM_IMMEDIATE_FIELD	(1 << FEATURE_OR_IMMEDIATE_SHIFT)
 struct i40e_aqc_nvm_config_data_feature {
 	__le16 feature_id;
-	__le16 instance_id;
+#define I40E_AQ_ANVM_FEATURE_OPTION_OEM_ONLY		0x01
+#define I40E_AQ_ANVM_FEATURE_OPTION_DWORD_MAP		0x08
+#define I40E_AQ_ANVM_FEATURE_OPTION_POR_CSR		0x10
 	__le16 feature_options;
 	__le16 feature_selection;
 };
 
+I40E_CHECK_STRUCT_LEN(0x6, i40e_aqc_nvm_config_data_feature);
+
 struct i40e_aqc_nvm_config_data_immediate_field {
-#define ANVM_FEATURE_OR_IMMEDIATE_MASK	0x2
-	__le16 field_id;
-	__le16 instance_id;
+	__le32 field_id;
+	__le32 field_value;
 	__le16 field_options;
-	__le16 field_value;
+	__le16 reserved;
 };
 
+I40E_CHECK_STRUCT_LEN(0xc, i40e_aqc_nvm_config_data_immediate_field);
+
 /* Send to PF command (indirect 0x0801) id is only used by PF
  * Send to VF command (indirect 0x0802) id is only used by PF
  * Send to Peer PF command (indirect 0x0803)
@@ -2069,12 +2085,54 @@ struct i40e_aqc_get_cee_dcb_cfg_resp {
 	u8	oper_tc_bw[8];
 	u8	oper_pfc_en;
 	__le16	oper_app_prio;
+#define I40E_AQC_CEE_APP_FCOE_SHIFT	0x0
+#define I40E_AQC_CEE_APP_FCOE_MASK	(0x7 << I40E_AQC_CEE_APP_FCOE_SHIFT)
+#define I40E_AQC_CEE_APP_ISCSI_SHIFT	0x3
+#define I40E_AQC_CEE_APP_ISCSI_MASK	(0x7 << I40E_AQC_CEE_APP_ISCSI_SHIFT)
+#define I40E_AQC_CEE_APP_FIP_SHIFT	0x8
+#define I40E_AQC_CEE_APP_FIP_MASK	(0x7 << I40E_AQC_CEE_APP_FIP_SHIFT)
+#define I40E_AQC_CEE_APP_FIP_MASK	(0x7 << I40E_AQC_CEE_APP_FIP_SHIFT)
 	__le32	tlv_status;
+#define I40E_AQC_CEE_PG_STATUS_SHIFT	0x0
+#define I40E_AQC_CEE_PG_STATUS_MASK	(0x7 << I40E_AQC_CEE_PG_STATUS_SHIFT)
+#define I40E_AQC_CEE_PFC_STATUS_SHIFT	0x3
+#define I40E_AQC_CEE_PFC_STATUS_MASK	(0x7 << I40E_AQC_CEE_PFC_STATUS_SHIFT)
+#define I40E_AQC_CEE_APP_STATUS_SHIFT	0x8
+#define I40E_AQC_CEE_APP_STATUS_MASK	(0x7 << I40E_AQC_CEE_APP_STATUS_SHIFT)
 	u8	reserved[12];
 };
 
 I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_get_cee_dcb_cfg_resp);
 
+/*	Set Local LLDP MIB (indirect 0x0A08)
+ *	Used to replace the local MIB of a given LLDP agent. e.g. DCBx
+ */
+struct i40e_aqc_lldp_set_local_mib {
+#define SET_LOCAL_MIB_AC_TYPE_DCBX_SHIFT	0
+#define SET_LOCAL_MIB_AC_TYPE_DCBX_MASK	(1 << SET_LOCAL_MIB_AC_TYPE_DCBX_SHIFT)
+	u8	type;
+	u8	reserved0;
+	__le16	length;
+	u8	reserved1[4];
+	__le32	address_high;
+	__le32	address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_set_local_mib);
+
+/*	Stop/Start LLDP Agent (direct 0x0A09)
+ *	Used for stopping/starting specific LLDP agent. e.g. DCBx
+ */
+struct i40e_aqc_lldp_stop_start_specific_agent {
+#define I40E_AQC_START_SPECIFIC_AGENT_SHIFT	0
+#define I40E_AQC_START_SPECIFIC_AGENT_MASK \
+				(1 << I40E_AQC_START_SPECIFIC_AGENT_SHIFT)
+	u8	command;
+	u8	reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_stop_start_specific_agent);
+
 /* Add Udp Tunnel command and completion (direct 0x0B00) */
 struct i40e_aqc_add_udp_tunnel {
 	__le16	udp_port;
@@ -2164,6 +2222,28 @@ struct i40e_aqc_oem_state_change {
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_oem_state_change);
 
+/* Initialize OCSD (0xFE02, direct) */
+struct i40e_aqc_opc_oem_ocsd_initialize {
+	u8 type_status;
+	u8 reserved1[3];
+	__le32 ocsd_memory_block_addr_high;
+	__le32 ocsd_memory_block_addr_low;
+	__le32 requested_update_interval;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_opc_oem_ocsd_initialize);
+
+/* Initialize OCBB  (0xFE03, direct) */
+struct i40e_aqc_opc_oem_ocbb_initialize {
+	u8 type_status;
+	u8 reserved1[3];
+	__le32 ocbb_memory_block_addr_high;
+	__le32 ocbb_memory_block_addr_low;
+	u8 reserved2[4];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_opc_oem_ocbb_initialize);
+
 /* debug commands */
 
 /* get device id (0xFF00) uses the generic structure */
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
index 5363cbf..e715bcc 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
@@ -268,6 +268,8 @@ enum i40e_admin_queue_opc {
 	/* OEM commands */
 	i40e_aqc_opc_oem_parameter_change	= 0xFE00,
 	i40e_aqc_opc_oem_device_status_change	= 0xFE01,
+	i40e_aqc_opc_oem_ocsd_initialize	= 0xFE02,
+	i40e_aqc_opc_oem_ocbb_initialize	= 0xFE03,
 
 	/* debug commands */
 	i40e_aqc_opc_debug_get_deviceid		= 0xFF00,
@@ -454,6 +456,7 @@ struct i40e_aqc_arp_proxy_data {
 	__le32	pfpm_proxyfc;
 	__le32	ip_addr;
 	u8	mac_addr[6];
+	u8	reserved[2];
 };
 
 I40E_CHECK_STRUCT_LEN(0x14, i40e_aqc_arp_proxy_data);
@@ -1854,21 +1857,32 @@ struct i40e_aqc_nvm_config_write {
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_config_write);
 
+/* Used for 0x0704 as well as for 0x0705 commands */
+#define I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_SHIFT		1
+#define I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_MASK \
+				(1 << I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_SHIFT)
+#define I40E_AQ_ANVM_FEATURE		0
+#define I40E_AQ_ANVM_IMMEDIATE_FIELD	(1 << FEATURE_OR_IMMEDIATE_SHIFT)
 struct i40e_aqc_nvm_config_data_feature {
 	__le16 feature_id;
-	__le16 instance_id;
+#define I40E_AQ_ANVM_FEATURE_OPTION_OEM_ONLY		0x01
+#define I40E_AQ_ANVM_FEATURE_OPTION_DWORD_MAP		0x08
+#define I40E_AQ_ANVM_FEATURE_OPTION_POR_CSR		0x10
 	__le16 feature_options;
 	__le16 feature_selection;
 };
 
+I40E_CHECK_STRUCT_LEN(0x6, i40e_aqc_nvm_config_data_feature);
+
 struct i40e_aqc_nvm_config_data_immediate_field {
-#define ANVM_FEATURE_OR_IMMEDIATE_MASK	0x2
-	__le16 field_id;
-	__le16 instance_id;
+	__le32 field_id;
+	__le32 field_value;
 	__le16 field_options;
-	__le16 field_value;
+	__le16 reserved;
 };
 
+I40E_CHECK_STRUCT_LEN(0xc, i40e_aqc_nvm_config_data_immediate_field);
+
 /* Send to PF command (indirect 0x0801) id is only used by PF
  * Send to VF command (indirect 0x0802) id is only used by PF
  * Send to Peer PF command (indirect 0x0803)
@@ -2140,6 +2154,28 @@ struct i40e_aqc_oem_state_change {
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_oem_state_change);
 
+/* Initialize OCSD (0xFE02, direct) */
+struct i40e_aqc_opc_oem_ocsd_initialize {
+	u8 type_status;
+	u8 reserved1[3];
+	__le32 ocsd_memory_block_addr_high;
+	__le32 ocsd_memory_block_addr_low;
+	__le32 requested_update_interval;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_opc_oem_ocsd_initialize);
+
+/* Initialize OCBB  (0xFE03, direct) */
+struct i40e_aqc_opc_oem_ocbb_initialize {
+	u8 type_status;
+	u8 reserved1[3];
+	__le32 ocbb_memory_block_addr_high;
+	__le32 ocbb_memory_block_addr_low;
+	u8 reserved2[4];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_opc_oem_ocbb_initialize);
+
 /* debug commands */
 
 /* get device id (0xFF00) uses the generic structure */
-- 
1.9.3

^ permalink raw reply related

* [net-next v2 11/17] i40e: add more struct size checks
From: Jeff Kirsher @ 2015-01-16 13:29 UTC (permalink / raw)
  To: davem; +Cc: Shannon Nelson, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <1421414946-22179-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Shannon Nelson <shannon.nelson@intel.com>

Add struct size checks to many of the indirect structs and a few
command structs that were left out previously.

Change-ID: I7810b9af0f04e3ced670639f8671daf7df9b3f4d
Signed-off-by: Shannon Nelson <shannon.nelson@intel.com>
Acked-by: Greg Rose <gregory.v.rose@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h  | 43 ++++++++++++++++++++++
 .../net/ethernet/intel/i40evf/i40e_adminq_cmd.h    | 43 ++++++++++++++++++++++
 2 files changed, 86 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index 490dfcf..20cada5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -456,6 +456,8 @@ struct i40e_aqc_arp_proxy_data {
 	u8	mac_addr[6];
 };
 
+I40E_CHECK_STRUCT_LEN(0x14, i40e_aqc_arp_proxy_data);
+
 /* Set NS Proxy Table Entry Command (indirect 0x0105) */
 struct i40e_aqc_ns_proxy_data {
 	__le16	table_idx_mac_addr_0;
@@ -481,6 +483,8 @@ struct i40e_aqc_ns_proxy_data {
 	u8	ipv6_addr_1[16];
 };
 
+I40E_CHECK_STRUCT_LEN(0x3c, i40e_aqc_ns_proxy_data);
+
 /* Manage LAA Command (0x0106) - obsolete */
 struct i40e_aqc_mng_laa {
 	__le16	command_flags;
@@ -491,6 +495,8 @@ struct i40e_aqc_mng_laa {
 	u8	reserved2[6];
 };
 
+I40E_CHECK_CMD_LENGTH(i40e_aqc_mng_laa);
+
 /* Manage MAC Address Read Command (indirect 0x0107) */
 struct i40e_aqc_mac_address_read {
 	__le16	command_flags;
@@ -562,6 +568,8 @@ struct i40e_aqc_get_switch_config_header_resp {
 	u8	reserved[12];
 };
 
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_switch_config_header_resp);
+
 struct i40e_aqc_switch_config_element_resp {
 	u8	element_type;
 #define I40E_AQ_SW_ELEM_TYPE_MAC	1
@@ -587,6 +595,8 @@ struct i40e_aqc_switch_config_element_resp {
 	__le16	element_info;
 };
 
+I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_config_element_resp);
+
 /* Get Switch Configuration (indirect 0x0200)
  *    an array of elements are returned in the response buffer
  *    the first in the array is the header, remainder are elements
@@ -596,6 +606,8 @@ struct i40e_aqc_get_switch_config_resp {
 	struct i40e_aqc_switch_config_element_resp	element[1];
 };
 
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_get_switch_config_resp);
+
 /* Add Statistics (direct 0x0201)
  * Remove Statistics (direct 0x0202)
  */
@@ -661,6 +673,8 @@ struct i40e_aqc_switch_resource_alloc_element_resp {
 	u8	reserved2[6];
 };
 
+I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_resource_alloc_element_resp);
+
 /* Add VSI (indirect 0x0210)
  *    this indirect command uses struct i40e_aqc_vsi_properties_data
  *    as the indirect buffer (128 bytes)
@@ -1092,6 +1106,8 @@ struct i40e_aqc_remove_tag {
 	u8	reserved[12];
 };
 
+I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_tag);
+
 /* Add multicast E-Tag (direct 0x0257)
  * del multicast E-Tag (direct 0x0258) only uses pv_seid and etag fields
  * and no external data
@@ -1359,6 +1375,8 @@ struct i40e_aqc_configure_vsi_ets_sla_bw_data {
 	u8	reserved1[28];
 };
 
+I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_configure_vsi_ets_sla_bw_data);
+
 /* Configure VSI Bandwidth Allocation per Traffic Type (indirect 0x0407)
  *    responds with i40e_aqc_qs_handles_resp
  */
@@ -1370,6 +1388,8 @@ struct i40e_aqc_configure_vsi_tc_bw_data {
 	__le16	qs_handles[8];
 };
 
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_configure_vsi_tc_bw_data);
+
 /* Query vsi bw configuration (indirect 0x0408) */
 struct i40e_aqc_query_vsi_bw_config_resp {
 	u8	tc_valid_bits;
@@ -1383,6 +1403,8 @@ struct i40e_aqc_query_vsi_bw_config_resp {
 	u8	reserved3[23];
 };
 
+I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_query_vsi_bw_config_resp);
+
 /* Query VSI Bandwidth Allocation per Traffic Type (indirect 0x040A) */
 struct i40e_aqc_query_vsi_ets_sla_config_resp {
 	u8	tc_valid_bits;
@@ -1394,6 +1416,8 @@ struct i40e_aqc_query_vsi_ets_sla_config_resp {
 	__le16	tc_bw_max[2];
 };
 
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_query_vsi_ets_sla_config_resp);
+
 /* Configure Switching Component Bandwidth Limit (direct 0x0410) */
 struct i40e_aqc_configure_switching_comp_bw_limit {
 	__le16	seid;
@@ -1421,6 +1445,8 @@ struct i40e_aqc_configure_switching_comp_ets_data {
 	u8	reserved2[96];
 };
 
+I40E_CHECK_STRUCT_LEN(0x80, i40e_aqc_configure_switching_comp_ets_data);
+
 /* Configure Switching Component Bandwidth Limits per Tc (indirect 0x0416) */
 struct i40e_aqc_configure_switching_comp_ets_bw_limit_data {
 	u8	tc_valid_bits;
@@ -1432,6 +1458,9 @@ struct i40e_aqc_configure_switching_comp_ets_bw_limit_data {
 	u8	reserved1[28];
 };
 
+I40E_CHECK_STRUCT_LEN(0x40,
+		      i40e_aqc_configure_switching_comp_ets_bw_limit_data);
+
 /* Configure Switching Component Bandwidth Allocation per Tc
  * (indirect 0x0417)
  */
@@ -1443,6 +1472,8 @@ struct i40e_aqc_configure_switching_comp_bw_config_data {
 	u8	reserved1[20];
 };
 
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_configure_switching_comp_bw_config_data);
+
 /* Query Switching Component Configuration (indirect 0x0418) */
 struct i40e_aqc_query_switching_comp_ets_config_resp {
 	u8	tc_valid_bits;
@@ -1453,6 +1484,8 @@ struct i40e_aqc_query_switching_comp_ets_config_resp {
 	u8	reserved2[23];
 };
 
+I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_query_switching_comp_ets_config_resp);
+
 /* Query PhysicalPort ETS Configuration (indirect 0x0419) */
 struct i40e_aqc_query_port_ets_config_resp {
 	u8	reserved[4];
@@ -1468,6 +1501,8 @@ struct i40e_aqc_query_port_ets_config_resp {
 	u8	reserved3[32];
 };
 
+I40E_CHECK_STRUCT_LEN(0x44, i40e_aqc_query_port_ets_config_resp);
+
 /* Query Switching Component Bandwidth Allocation per Traffic Type
  * (indirect 0x041A)
  */
@@ -1482,6 +1517,8 @@ struct i40e_aqc_query_switching_comp_bw_config_resp {
 	__le16	tc_bw_max[2];
 };
 
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_query_switching_comp_bw_config_resp);
+
 /* Suspend/resume port TX traffic
  * (direct 0x041B and 0x041C) uses the generic SEID struct
  */
@@ -1495,6 +1532,8 @@ struct i40e_aqc_configure_partition_bw_data {
 	u8	max_bw[16];      /* bandwidth limit */
 };
 
+I40E_CHECK_STRUCT_LEN(0x22, i40e_aqc_configure_partition_bw_data);
+
 /* Get and set the active HMC resource profile and status.
  * (direct 0x0500) and (direct 0x0501)
  */
@@ -1577,6 +1616,8 @@ struct i40e_aqc_module_desc {
 	u8 reserved2[8];
 };
 
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_module_desc);
+
 struct i40e_aq_get_phy_abilities_resp {
 	__le32	phy_type;       /* bitmap using the above enum for offsets */
 	u8	link_speed;     /* bitmap using the above enum bit patterns */
@@ -1605,6 +1646,8 @@ struct i40e_aq_get_phy_abilities_resp {
 	struct i40e_aqc_module_desc	qualified_module[I40E_AQ_PHY_MAX_QMS];
 };
 
+I40E_CHECK_STRUCT_LEN(0x218, i40e_aq_get_phy_abilities_resp);
+
 /* Set PHY Config (direct 0x0601) */
 struct i40e_aq_set_phy_config { /* same bits as above in all */
 	__le32	phy_type;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
index a20b2b0..1b80846 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
@@ -456,6 +456,8 @@ struct i40e_aqc_arp_proxy_data {
 	u8	mac_addr[6];
 };
 
+I40E_CHECK_STRUCT_LEN(0x14, i40e_aqc_arp_proxy_data);
+
 /* Set NS Proxy Table Entry Command (indirect 0x0105) */
 struct i40e_aqc_ns_proxy_data {
 	__le16	table_idx_mac_addr_0;
@@ -481,6 +483,8 @@ struct i40e_aqc_ns_proxy_data {
 	u8	ipv6_addr_1[16];
 };
 
+I40E_CHECK_STRUCT_LEN(0x3c, i40e_aqc_ns_proxy_data);
+
 /* Manage LAA Command (0x0106) - obsolete */
 struct i40e_aqc_mng_laa {
 	__le16	command_flags;
@@ -491,6 +495,8 @@ struct i40e_aqc_mng_laa {
 	u8	reserved2[6];
 };
 
+I40E_CHECK_CMD_LENGTH(i40e_aqc_mng_laa);
+
 /* Manage MAC Address Read Command (indirect 0x0107) */
 struct i40e_aqc_mac_address_read {
 	__le16	command_flags;
@@ -562,6 +568,8 @@ struct i40e_aqc_get_switch_config_header_resp {
 	u8	reserved[12];
 };
 
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_switch_config_header_resp);
+
 struct i40e_aqc_switch_config_element_resp {
 	u8	element_type;
 #define I40E_AQ_SW_ELEM_TYPE_MAC	1
@@ -587,6 +595,8 @@ struct i40e_aqc_switch_config_element_resp {
 	__le16	element_info;
 };
 
+I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_config_element_resp);
+
 /* Get Switch Configuration (indirect 0x0200)
  *    an array of elements are returned in the response buffer
  *    the first in the array is the header, remainder are elements
@@ -596,6 +606,8 @@ struct i40e_aqc_get_switch_config_resp {
 	struct i40e_aqc_switch_config_element_resp	element[1];
 };
 
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_get_switch_config_resp);
+
 /* Add Statistics (direct 0x0201)
  * Remove Statistics (direct 0x0202)
  */
@@ -661,6 +673,8 @@ struct i40e_aqc_switch_resource_alloc_element_resp {
 	u8	reserved2[6];
 };
 
+I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_resource_alloc_element_resp);
+
 /* Add VSI (indirect 0x0210)
  *    this indirect command uses struct i40e_aqc_vsi_properties_data
  *    as the indirect buffer (128 bytes)
@@ -1092,6 +1106,8 @@ struct i40e_aqc_remove_tag {
 	u8	reserved[12];
 };
 
+I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_tag);
+
 /* Add multicast E-Tag (direct 0x0257)
  * del multicast E-Tag (direct 0x0258) only uses pv_seid and etag fields
  * and no external data
@@ -1359,6 +1375,8 @@ struct i40e_aqc_configure_vsi_ets_sla_bw_data {
 	u8	reserved1[28];
 };
 
+I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_configure_vsi_ets_sla_bw_data);
+
 /* Configure VSI Bandwidth Allocation per Traffic Type (indirect 0x0407)
  *    responds with i40e_aqc_qs_handles_resp
  */
@@ -1370,6 +1388,8 @@ struct i40e_aqc_configure_vsi_tc_bw_data {
 	__le16	qs_handles[8];
 };
 
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_configure_vsi_tc_bw_data);
+
 /* Query vsi bw configuration (indirect 0x0408) */
 struct i40e_aqc_query_vsi_bw_config_resp {
 	u8	tc_valid_bits;
@@ -1383,6 +1403,8 @@ struct i40e_aqc_query_vsi_bw_config_resp {
 	u8	reserved3[23];
 };
 
+I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_query_vsi_bw_config_resp);
+
 /* Query VSI Bandwidth Allocation per Traffic Type (indirect 0x040A) */
 struct i40e_aqc_query_vsi_ets_sla_config_resp {
 	u8	tc_valid_bits;
@@ -1394,6 +1416,8 @@ struct i40e_aqc_query_vsi_ets_sla_config_resp {
 	__le16	tc_bw_max[2];
 };
 
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_query_vsi_ets_sla_config_resp);
+
 /* Configure Switching Component Bandwidth Limit (direct 0x0410) */
 struct i40e_aqc_configure_switching_comp_bw_limit {
 	__le16	seid;
@@ -1421,6 +1445,8 @@ struct i40e_aqc_configure_switching_comp_ets_data {
 	u8	reserved2[96];
 };
 
+I40E_CHECK_STRUCT_LEN(0x80, i40e_aqc_configure_switching_comp_ets_data);
+
 /* Configure Switching Component Bandwidth Limits per Tc (indirect 0x0416) */
 struct i40e_aqc_configure_switching_comp_ets_bw_limit_data {
 	u8	tc_valid_bits;
@@ -1432,6 +1458,9 @@ struct i40e_aqc_configure_switching_comp_ets_bw_limit_data {
 	u8	reserved1[28];
 };
 
+I40E_CHECK_STRUCT_LEN(0x40,
+		      i40e_aqc_configure_switching_comp_ets_bw_limit_data);
+
 /* Configure Switching Component Bandwidth Allocation per Tc
  * (indirect 0x0417)
  */
@@ -1443,6 +1472,8 @@ struct i40e_aqc_configure_switching_comp_bw_config_data {
 	u8	reserved1[20];
 };
 
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_configure_switching_comp_bw_config_data);
+
 /* Query Switching Component Configuration (indirect 0x0418) */
 struct i40e_aqc_query_switching_comp_ets_config_resp {
 	u8	tc_valid_bits;
@@ -1453,6 +1484,8 @@ struct i40e_aqc_query_switching_comp_ets_config_resp {
 	u8	reserved2[23];
 };
 
+I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_query_switching_comp_ets_config_resp);
+
 /* Query PhysicalPort ETS Configuration (indirect 0x0419) */
 struct i40e_aqc_query_port_ets_config_resp {
 	u8	reserved[4];
@@ -1468,6 +1501,8 @@ struct i40e_aqc_query_port_ets_config_resp {
 	u8	reserved3[32];
 };
 
+I40E_CHECK_STRUCT_LEN(0x44, i40e_aqc_query_port_ets_config_resp);
+
 /* Query Switching Component Bandwidth Allocation per Traffic Type
  * (indirect 0x041A)
  */
@@ -1482,6 +1517,8 @@ struct i40e_aqc_query_switching_comp_bw_config_resp {
 	__le16	tc_bw_max[2];
 };
 
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_query_switching_comp_bw_config_resp);
+
 /* Suspend/resume port TX traffic
  * (direct 0x041B and 0x041C) uses the generic SEID struct
  */
@@ -1495,6 +1532,8 @@ struct i40e_aqc_configure_partition_bw_data {
 	u8	max_bw[16];      /* bandwidth limit */
 };
 
+I40E_CHECK_STRUCT_LEN(0x22, i40e_aqc_configure_partition_bw_data);
+
 /* Get and set the active HMC resource profile and status.
  * (direct 0x0500) and (direct 0x0501)
  */
@@ -1577,6 +1616,8 @@ struct i40e_aqc_module_desc {
 	u8 reserved2[8];
 };
 
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_module_desc);
+
 struct i40e_aq_get_phy_abilities_resp {
 	__le32	phy_type;       /* bitmap using the above enum for offsets */
 	u8	link_speed;     /* bitmap using the above enum bit patterns */
@@ -1605,6 +1646,8 @@ struct i40e_aq_get_phy_abilities_resp {
 	struct i40e_aqc_module_desc	qualified_module[I40E_AQ_PHY_MAX_QMS];
 };
 
+I40E_CHECK_STRUCT_LEN(0x218, i40e_aq_get_phy_abilities_resp);
+
 /* Set PHY Config (direct 0x0601) */
 struct i40e_aq_set_phy_config { /* same bits as above in all */
 	__le32	phy_type;
-- 
1.9.3

^ permalink raw reply related

* [net-next v2 09/17] i40e: check I40E_FLAG_PTP before handling Tx or Rx timestamps
From: Jeff Kirsher @ 2015-01-16 13:28 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <1421414946-22179-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

We should not be doing Tx or Rx timestamps if we do not have PTP
enabled. Add checks to ensure that we don't attempt to handle any PTP
related timestamping code if we have not enabled PTP on that PF.

Change-ID: I4335942ae2d5c5f91abfdbeeea02bcace49e7677
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_ptp.c  | 9 ++++++++-
 drivers/net/ethernet/intel/i40e/i40e_txrx.c | 3 +++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index 6d1ec92..a152878 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -305,6 +305,13 @@ void i40e_ptp_tx_hwtstamp(struct i40e_pf *pf)
 	u32 hi, lo;
 	u64 ns;
 
+	if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_tx)
+		return;
+
+	/* don't attempt to timestamp if we don't have an skb */
+	if (!pf->ptp_tx_skb)
+		return;
+
 	lo = rd32(hw, I40E_PRTTSYN_TXTIME_L);
 	hi = rd32(hw, I40E_PRTTSYN_TXTIME_H);
 
@@ -338,7 +345,7 @@ void i40e_ptp_rx_hwtstamp(struct i40e_pf *pf, struct sk_buff *skb, u8 index)
 	/* Since we cannot turn off the Rx timestamp logic if the device is
 	 * doing Tx timestamping, check if Rx timestamping is configured.
 	 */
-	if (!pf->ptp_rx)
+	if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_rx)
 		return;
 
 	hw = &pf->hw;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index bb86390..420d662 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1939,6 +1939,9 @@ static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
 	 * we are not already transmitting a packet to be timestamped
 	 */
 	pf = i40e_netdev_to_pf(tx_ring->netdev);
+	if (!(pf->flags & I40E_FLAG_PTP))
+		return 0;
+
 	if (pf->ptp_tx &&
 	    !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) {
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
-- 
1.9.3

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox