Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH 01/14] add Documentation/namespaces/user_namespace.txt
From: Randy Dunlap @ 2011-07-27 16:02 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: linux-kernel, dhowells, ebiederm, containers, netdev, akpm,
	Serge E. Hallyn
In-Reply-To: <20110727153848.GA17288@hallyn.com>

On Wed, 27 Jul 2011 15:38:48 +0000 Serge E. Hallyn wrote:

> > > +Working notes
> > > +=============
> > 
> > A lot of this file is working notes and will need to be updated...
> 
> Yup.  I can leave it out of this file and keep it on the wiki instead, if
> that is preferred.

Either place is OK with me, as long as you continue to update it
and don't let it go stale.

---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***

^ permalink raw reply

* [patch 11/11] [PATCH] qeth: add support for af_iucv HiperSockets transport
From: frank.blaschka @ 2011-07-27 16:12 UTC (permalink / raw)
  To: netdev, linux-s390; +Cc: Einar Lueck, Ursula Braun
In-Reply-To: <20110727161242.018577444@de.ibm.com>

[-- Attachment #1: netnext_qeth_iucv.patch --]
[-- Type: text/plain, Size: 29304 bytes --]

From: Frank Blaschka <frank.blaschka@de.ibm.com>

This patch extends the HiperSockets device driver to send and receive
af_iucv traffic over HiperSockets transport.
TX: Driver uses new asynchronous delivery of storage blocks to pass
    flow control/congestion information from the HiperSockets microcode
    to the af_iucv socket.
RX: Memory for incoming traffic is preallocated and passed to 
    HiperSockets layer. If receiver is not capable to clean its buffers
    shared with HiperSockets and pass new memory to the HiperSockets
    layer this will cause flow control/congestion events on the
    sender.

Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
Signed-off-by: Einar Lueck <elelueck@de.ibm.com>
Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
---
 drivers/s390/net/qeth_core.h      |   19 ++-
 drivers/s390/net/qeth_core_main.c |  230 +++++++++++++++++++++++++++++++-------
 drivers/s390/net/qeth_l2_main.c   |    2 
 drivers/s390/net/qeth_l3.h        |    4 
 drivers/s390/net/qeth_l3_main.c   |   87 +++++++++++---
 drivers/s390/net/qeth_l3_sys.c    |  110 ++++++++++++++++++
 6 files changed, 391 insertions(+), 61 deletions(-)

--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -217,6 +217,7 @@ static inline int qeth_is_ipa_enabled(st
  */
 #define QETH_TX_TIMEOUT		100 * HZ
 #define QETH_RCD_TIMEOUT	60 * HZ
+#define QETH_RECLAIM_WORK_TIME	HZ
 #define QETH_HEADER_SIZE	32
 #define QETH_MAX_PORTNO		15
 
@@ -265,6 +266,7 @@ static inline int qeth_is_ipa_enabled(st
 
 /* large receive scatter gather copy break */
 #define QETH_RX_SG_CB (PAGE_SIZE >> 1)
+#define QETH_RX_PULL_LEN 256
 
 struct qeth_hdr_layer3 {
 	__u8  id;
@@ -382,6 +384,16 @@ enum qeth_qdio_buffer_states {
 	QETH_QDIO_BUF_PRIMED,
 	/*
 	 * inbound: not applicable
+	 * outbound: identified to be pending in TPQ
+	 */
+	QETH_QDIO_BUF_PENDING,
+	/*
+	 * inbound: not applicable
+	 * outbound: found in completion queue
+	 */
+	QETH_QDIO_BUF_IN_CQ,
+	/*
+	 * inbound: not applicable
 	 * outbound: handled via transfer pending / completion queue
 	 */
 	QETH_QDIO_BUF_HANDLED_DELAYED,
@@ -409,6 +421,7 @@ struct qeth_qdio_buffer {
 	struct qdio_buffer *buffer;
 	/* the buffer pool entry currently associated to this buffer */
 	struct qeth_buffer_pool_entry *pool_entry;
+	struct sk_buff *rx_skb;
 };
 
 struct qeth_qdio_q {
@@ -674,6 +687,7 @@ struct qeth_card_options {
 	enum qeth_ipa_isolation_modes isolation;
 	int sniffer;
 	enum qeth_cq cq;
+	char hsuid[9];
 };
 
 /*
@@ -771,6 +785,8 @@ struct qeth_card {
 	struct mutex discipline_mutex;
 	struct napi_struct napi;
 	struct qeth_rx rx;
+	struct delayed_work buffer_reclaim_work;
+	int reclaim_index;
 };
 
 struct qeth_card_list_struct {
@@ -836,6 +852,7 @@ int qeth_core_create_device_attributes(s
 void qeth_core_remove_device_attributes(struct device *);
 int qeth_core_create_osn_attributes(struct device *);
 void qeth_core_remove_osn_attributes(struct device *);
+void qeth_buffer_reclaim_work(struct work_struct *);
 
 /* exports for qeth discipline device drivers */
 extern struct qeth_card_list_struct qeth_core_card_list;
@@ -864,7 +881,7 @@ int qeth_check_qdio_errors(struct qeth_c
 		unsigned int, const char *);
 void qeth_queue_input_buffer(struct qeth_card *, int);
 struct sk_buff *qeth_core_get_next_skb(struct qeth_card *,
-		struct qdio_buffer *, struct qdio_buffer_element **, int *,
+		struct qeth_qdio_buffer *, struct qdio_buffer_element **, int *,
 		struct qeth_hdr **);
 void qeth_schedule_recovery(struct qeth_card *);
 void qeth_qdio_start_poll(struct ccw_device *, int, unsigned long);
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -21,6 +21,7 @@
 #include <linux/mii.h>
 #include <linux/kthread.h>
 #include <linux/slab.h>
+#include <net/iucv/af_iucv.h>
 
 #include <asm/ebcdic.h>
 #include <asm/io.h>
@@ -58,6 +59,10 @@ static void qeth_setup_ccw(struct qeth_c
 static void qeth_free_buffer_pool(struct qeth_card *);
 static int qeth_qdio_establish(struct qeth_card *);
 static void qeth_free_qdio_buffers(struct qeth_card *);
+static void qeth_notify_skbs(struct qeth_qdio_out_q *queue,
+		struct qeth_qdio_out_buffer *buf,
+		enum iucv_tx_notify notification);
+static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf);
 static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
 		struct qeth_qdio_out_buffer *buf,
 		enum qeth_qdio_buffer_states newbufstate);
@@ -204,7 +209,7 @@ static int qeth_alloc_buffer_pool(struct
 
 	QETH_CARD_TEXT(card, 5, "alocpool");
 	for (i = 0; i < card->qdio.init_pool.buf_count; ++i) {
-		pool_entry = kmalloc(sizeof(*pool_entry), GFP_KERNEL);
+		pool_entry = kzalloc(sizeof(*pool_entry), GFP_KERNEL);
 		if (!pool_entry) {
 			qeth_free_buffer_pool(card);
 			return -ENOMEM;
@@ -331,6 +336,30 @@ static inline void qeth_free_cq(struct q
 	card->qdio.out_bufstates = NULL;
 }
 
+static inline enum iucv_tx_notify qeth_compute_cq_notification(int sbalf15,
+	int delayed) {
+	enum iucv_tx_notify n;
+
+	switch (sbalf15) {
+	case 0:
+		n = delayed ? TX_NOTIFY_DELAYED_OK : TX_NOTIFY_OK;
+		break;
+	case 4:
+	case 16:
+	case 17:
+	case 18:
+		n = delayed ? TX_NOTIFY_DELAYED_UNREACHABLE :
+			TX_NOTIFY_UNREACHABLE;
+		break;
+	default:
+		n = delayed ? TX_NOTIFY_DELAYED_GENERALERROR :
+			TX_NOTIFY_GENERALERROR;
+		break;
+	}
+
+	return n;
+}
+
 static inline void qeth_cleanup_handled_pending(struct qeth_qdio_out_q *q,
 	int bidx, int forced_cleanup)
 {
@@ -345,6 +374,11 @@ static inline void qeth_cleanup_handled_
 				struct qeth_qdio_out_buffer *f = c;
 				QETH_CARD_TEXT(f->q->card, 5, "fp");
 				QETH_CARD_TEXT_(f->q->card, 5, "%lx", (long) f);
+				/* release here to avoid interleaving between
+				   outbound tasklet and inbound tasklet
+				   regarding notifications and lifecycle */
+				qeth_release_skbs(c);
+
 				c = f->next_pending;
 				BUG_ON(head->next_pending != f);
 				head->next_pending = c;
@@ -363,6 +397,7 @@ static inline void qeth_qdio_handle_aob(
 		unsigned long phys_aob_addr) {
 	struct qaob *aob;
 	struct qeth_qdio_out_buffer *buffer;
+	enum iucv_tx_notify notification;
 
 	aob = (struct qaob *) phys_to_virt(phys_aob_addr);
 	QETH_CARD_TEXT(card, 5, "haob");
@@ -372,6 +407,22 @@ static inline void qeth_qdio_handle_aob(
 
 	BUG_ON(buffer == NULL);
 
+	if (atomic_cmpxchg(&buffer->state, QETH_QDIO_BUF_PRIMED,
+			   QETH_QDIO_BUF_IN_CQ) == QETH_QDIO_BUF_PRIMED) {
+		notification = TX_NOTIFY_OK;
+	} else {
+		BUG_ON(atomic_read(&buffer->state) != QETH_QDIO_BUF_PENDING);
+
+		atomic_set(&buffer->state, QETH_QDIO_BUF_IN_CQ);
+		notification = TX_NOTIFY_DELAYED_OK;
+	}
+
+	if (aob->aorc != 0)  {
+		QETH_CARD_TEXT_(card, 2, "aorc%02X", aob->aorc);
+		notification = qeth_compute_cq_notification(aob->aorc, 1);
+	}
+	qeth_notify_skbs(buffer->q, buffer, notification);
+
 	buffer->aob = NULL;
 	qeth_clear_output_buffer(buffer->q, buffer,
 				QETH_QDIO_BUF_HANDLED_DELAYED);
@@ -738,7 +789,7 @@ static int qeth_setup_channel(struct qet
 	QETH_DBF_TEXT(SETUP, 2, "setupch");
 	for (cnt = 0; cnt < QETH_CMD_BUFFER_NO; cnt++) {
 		channel->iob[cnt].data =
-			kmalloc(QETH_BUFSIZE, GFP_DMA|GFP_KERNEL);
+			kzalloc(QETH_BUFSIZE, GFP_DMA|GFP_KERNEL);
 		if (channel->iob[cnt].data == NULL)
 			break;
 		channel->iob[cnt].state = BUF_STATE_FREE;
@@ -1033,23 +1084,60 @@ out:
 	return;
 }
 
-static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
+static void qeth_notify_skbs(struct qeth_qdio_out_q *q,
 		struct qeth_qdio_out_buffer *buf,
-		enum qeth_qdio_buffer_states newbufstate)
+		enum iucv_tx_notify notification)
 {
-	int i;
 	struct sk_buff *skb;
 
-	/* is PCI flag set on buffer? */
-	if (buf->buffer->element[0].sflags & SBAL_SFLAGS0_PCI_REQ)
-		atomic_dec(&queue->set_pci_flags_count);
+	if (skb_queue_empty(&buf->skb_list))
+		goto out;
+	skb = skb_peek(&buf->skb_list);
+	while (skb) {
+		QETH_CARD_TEXT_(q->card, 5, "skbn%d", notification);
+		QETH_CARD_TEXT_(q->card, 5, "%lx", (long) skb);
+		if (skb->protocol == ETH_P_AF_IUCV) {
+			if (skb->sk) {
+				struct iucv_sock *iucv = iucv_sk(skb->sk);
+				iucv->sk_txnotify(skb, notification);
+			}
+		}
+		if (skb_queue_is_last(&buf->skb_list, skb))
+			skb = NULL;
+		else
+			skb = skb_queue_next(&buf->skb_list, skb);
+	}
+out:
+	return;
+}
+
+static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf)
+{
+	struct sk_buff *skb;
 
 	skb = skb_dequeue(&buf->skb_list);
 	while (skb) {
+		QETH_CARD_TEXT(buf->q->card, 5, "skbr");
+		QETH_CARD_TEXT_(buf->q->card, 5, "%lx", (long) skb);
 		atomic_dec(&skb->users);
 		dev_kfree_skb_any(skb);
 		skb = skb_dequeue(&buf->skb_list);
 	}
+}
+
+static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
+		struct qeth_qdio_out_buffer *buf,
+		enum qeth_qdio_buffer_states newbufstate)
+{
+	int i;
+
+	/* is PCI flag set on buffer? */
+	if (buf->buffer->element[0].sflags & SBAL_SFLAGS0_PCI_REQ)
+		atomic_dec(&queue->set_pci_flags_count);
+
+	if (newbufstate == QETH_QDIO_BUF_EMPTY) {
+		qeth_release_skbs(buf);
+	}
 	for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(queue->card); ++i) {
 		if (buf->buffer->element[i].addr && buf->is_header[i])
 			kmem_cache_free(qeth_core_header_cache,
@@ -1111,14 +1199,16 @@ static void qeth_free_buffer_pool(struct
 
 static void qeth_free_qdio_buffers(struct qeth_card *card)
 {
-	int i;
+	int i, j;
 
 	if (atomic_xchg(&card->qdio.state, QETH_QDIO_UNINITIALIZED) ==
 		QETH_QDIO_UNINITIALIZED)
 		return;
 
 	qeth_free_cq(card);
-
+	cancel_delayed_work_sync(&card->buffer_reclaim_work);
+	for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j)
+		kfree_skb(card->qdio.in_q->bufs[j].rx_skb);
 	kfree(card->qdio.in_q);
 	card->qdio.in_q = NULL;
 	/* inbound buffer pool */
@@ -1287,6 +1377,7 @@ static int qeth_setup_card(struct qeth_c
 	card->ipato.invert6 = 0;
 	/* init QDIO stuff */
 	qeth_init_qdio_info(card);
+	INIT_DELAYED_WORK(&card->buffer_reclaim_work, qeth_buffer_reclaim_work);
 	return 0;
 }
 
@@ -1308,7 +1399,7 @@ static struct qeth_card *qeth_alloc_card
 	if (!card)
 		goto out;
 	QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *));
-	card->ip_tbd_list = kmalloc(sizeof(struct list_head), GFP_KERNEL);
+	card->ip_tbd_list = kzalloc(sizeof(struct list_head), GFP_KERNEL);
 	if (!card->ip_tbd_list) {
 		QETH_DBF_TEXT(SETUP, 0, "iptbdnom");
 		goto out_card;
@@ -2237,7 +2328,7 @@ static int qeth_alloc_qdio_buffers(struc
 		QETH_QDIO_ALLOCATED) != QETH_QDIO_UNINITIALIZED)
 		return 0;
 
-	card->qdio.in_q = kmalloc(sizeof(struct qeth_qdio_q),
+	card->qdio.in_q = kzalloc(sizeof(struct qeth_qdio_q),
 				   GFP_KERNEL);
 	if (!card->qdio.in_q)
 		goto out_nomem;
@@ -2245,27 +2336,28 @@ static int qeth_alloc_qdio_buffers(struc
 	QETH_DBF_HEX(SETUP, 2, &card->qdio.in_q, sizeof(void *));
 	memset(card->qdio.in_q, 0, sizeof(struct qeth_qdio_q));
 	/* give inbound qeth_qdio_buffers their qdio_buffers */
-	for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i)
+	for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) {
 		card->qdio.in_q->bufs[i].buffer =
 			&card->qdio.in_q->qdio_bufs[i];
+		card->qdio.in_q->bufs[i].rx_skb = NULL;
+	}
 	/* inbound buffer pool */
 	if (qeth_alloc_buffer_pool(card))
 		goto out_freeinq;
 
 	/* outbound */
 	card->qdio.out_qs =
-		kmalloc(card->qdio.no_out_queues *
+		kzalloc(card->qdio.no_out_queues *
 			sizeof(struct qeth_qdio_out_q *), GFP_KERNEL);
 	if (!card->qdio.out_qs)
 		goto out_freepool;
 	for (i = 0; i < card->qdio.no_out_queues; ++i) {
-		card->qdio.out_qs[i] = kmalloc(sizeof(struct qeth_qdio_out_q),
+		card->qdio.out_qs[i] = kzalloc(sizeof(struct qeth_qdio_out_q),
 					       GFP_KERNEL);
 		if (!card->qdio.out_qs[i])
 			goto out_freeoutq;
 		QETH_DBF_TEXT_(SETUP, 2, "outq %i", i);
 		QETH_DBF_HEX(SETUP, 2, &card->qdio.out_qs[i], sizeof(void *));
-		memset(card->qdio.out_qs[i], 0, sizeof(struct qeth_qdio_out_q));
 		card->qdio.out_qs[i]->queue_no = i;
 		/* give outbound qeth_qdio_buffers their qdio_buffers */
 		for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) {
@@ -2563,6 +2655,12 @@ static int qeth_init_input_buffer(struct
 	struct qeth_buffer_pool_entry *pool_entry;
 	int i;
 
+	if ((card->options.cq == QETH_CQ_ENABLED) && (!buf->rx_skb)) {
+		buf->rx_skb = dev_alloc_skb(QETH_RX_PULL_LEN + ETH_HLEN);
+		if (!buf->rx_skb)
+			return 1;
+	}
+
 	pool_entry = qeth_find_free_buffer_pool_entry(card);
 	if (!pool_entry)
 		return 1;
@@ -2952,9 +3050,19 @@ int qeth_check_qdio_errors(struct qeth_c
 }
 EXPORT_SYMBOL_GPL(qeth_check_qdio_errors);
 
+void qeth_buffer_reclaim_work(struct work_struct *work)
+{
+	struct qeth_card *card = container_of(work, struct qeth_card,
+		buffer_reclaim_work.work);
+
+	QETH_CARD_TEXT_(card, 2, "brw:%x", card->reclaim_index);
+	qeth_queue_input_buffer(card, card->reclaim_index);
+}
+
 void qeth_queue_input_buffer(struct qeth_card *card, int index)
 {
 	struct qeth_qdio_q *queue = card->qdio.in_q;
+	struct list_head *lh;
 	int count;
 	int i;
 	int rc;
@@ -2986,6 +3094,20 @@ void qeth_queue_input_buffer(struct qeth
 			atomic_add_unless(&card->force_alloc_skb, -1, 0);
 		}
 
+		if (!count) {
+			i = 0;
+			list_for_each(lh, &card->qdio.in_buf_pool.entry_list)
+				i++;
+			if (i == card->qdio.in_buf_pool.buf_count) {
+				QETH_CARD_TEXT(card, 2, "qsarbw");
+				card->reclaim_index = index;
+				schedule_delayed_work(
+					&card->buffer_reclaim_work,
+					QETH_RECLAIM_WORK_TIME);
+			}
+			return;
+		}
+
 		/*
 		 * according to old code it should be avoided to requeue all
 		 * 128 buffers in order to benefit from PCI avoidance.
@@ -3394,15 +3516,34 @@ void qeth_qdio_output_handler(struct ccw
 		if (queue->bufstates &&
 		    (queue->bufstates[bidx].flags &
 		     QDIO_OUTBUF_STATE_FLAG_PENDING) != 0) {
+			BUG_ON(card->options.cq != QETH_CQ_ENABLED);
+
+			if (atomic_cmpxchg(&buffer->state,
+					   QETH_QDIO_BUF_PRIMED,
+					   QETH_QDIO_BUF_PENDING) ==
+				QETH_QDIO_BUF_PRIMED) {
+				qeth_notify_skbs(queue, buffer,
+						 TX_NOTIFY_PENDING);
+			}
 			buffer->aob = queue->bufstates[bidx].aob;
 			QETH_CARD_TEXT_(queue->card, 5, "pel%d", bidx);
-			QETH_CARD_TEXT_(queue->card, 5, "aob");
+			QETH_CARD_TEXT(queue->card, 5, "aob");
 			QETH_CARD_TEXT_(queue->card, 5, "%lx",
 					virt_to_phys(buffer->aob));
 			BUG_ON(bidx < 0 || bidx >= QDIO_MAX_BUFFERS_PER_Q);
-			if (qeth_init_qdio_out_buf(queue, bidx))
+			if (qeth_init_qdio_out_buf(queue, bidx)) {
+				QETH_CARD_TEXT(card, 2, "outofbuf");
 				qeth_schedule_recovery(card);
+			}
 		} else {
+			if (card->options.cq == QETH_CQ_ENABLED) {
+				enum iucv_tx_notify n;
+
+				n = qeth_compute_cq_notification(
+					buffer->buffer->element[15].sflags, 0);
+				qeth_notify_skbs(queue, buffer, n);
+			}
+
 			qeth_clear_output_buffer(queue, buffer,
 						QETH_QDIO_BUF_EMPTY);
 		}
@@ -4333,7 +4474,7 @@ static int qeth_qdio_establish(struct qe
 	qeth_create_qib_param_field(card, qib_param_field);
 	qeth_create_qib_param_field_blkt(card, qib_param_field);
 
-	in_sbal_ptrs = kmalloc(card->qdio.no_in_queues *
+	in_sbal_ptrs = kzalloc(card->qdio.no_in_queues *
 			       QDIO_MAX_BUFFERS_PER_Q * sizeof(void *),
 			       GFP_KERNEL);
 	if (!in_sbal_ptrs) {
@@ -4357,7 +4498,7 @@ static int qeth_qdio_establish(struct qe
 	qeth_qdio_establish_cq(card, in_sbal_ptrs, queue_start_poll);
 
 	out_sbal_ptrs =
-		kmalloc(card->qdio.no_out_queues * QDIO_MAX_BUFFERS_PER_Q *
+		kzalloc(card->qdio.no_out_queues * QDIO_MAX_BUFFERS_PER_Q *
 			sizeof(void *), GFP_KERNEL);
 	if (!out_sbal_ptrs) {
 		rc = -ENOMEM;
@@ -4555,29 +4696,36 @@ out:
 }
 EXPORT_SYMBOL_GPL(qeth_core_hardsetup_card);
 
-static inline int qeth_create_skb_frag(struct qdio_buffer_element *element,
+static inline int qeth_create_skb_frag(struct qeth_qdio_buffer *qethbuffer,
+		struct qdio_buffer_element *element,
 		struct sk_buff **pskb, int offset, int *pfrag, int data_len)
 {
 	struct page *page = virt_to_page(element->addr);
 	if (*pskb == NULL) {
-		/* the upper protocol layers assume that there is data in the
-		 * skb itself. Copy a small amount (64 bytes) to make them
-		 * happy. */
-		*pskb = dev_alloc_skb(64 + ETH_HLEN);
-		if (!(*pskb))
-			return -ENOMEM;
+		if (qethbuffer->rx_skb) {
+			/* only if qeth_card.options.cq == QETH_CQ_ENABLED */
+			*pskb = qethbuffer->rx_skb;
+			qethbuffer->rx_skb = NULL;
+		} else {
+			*pskb = dev_alloc_skb(QETH_RX_PULL_LEN + ETH_HLEN);
+			if (!(*pskb))
+				return -ENOMEM;
+		}
+
 		skb_reserve(*pskb, ETH_HLEN);
-		if (data_len <= 64) {
+		if (data_len <= QETH_RX_PULL_LEN) {
 			memcpy(skb_put(*pskb, data_len), element->addr + offset,
 				data_len);
 		} else {
 			get_page(page);
-			memcpy(skb_put(*pskb, 64), element->addr + offset, 64);
-			skb_fill_page_desc(*pskb, *pfrag, page, offset + 64,
-				data_len - 64);
-			(*pskb)->data_len += data_len - 64;
-			(*pskb)->len      += data_len - 64;
-			(*pskb)->truesize += data_len - 64;
+			memcpy(skb_put(*pskb, QETH_RX_PULL_LEN),
+			       element->addr + offset, QETH_RX_PULL_LEN);
+			skb_fill_page_desc(*pskb, *pfrag, page,
+				offset + QETH_RX_PULL_LEN,
+				data_len - QETH_RX_PULL_LEN);
+			(*pskb)->data_len += data_len - QETH_RX_PULL_LEN;
+			(*pskb)->len      += data_len - QETH_RX_PULL_LEN;
+			(*pskb)->truesize += data_len - QETH_RX_PULL_LEN;
 			(*pfrag)++;
 		}
 	} else {
@@ -4594,11 +4742,12 @@ static inline int qeth_create_skb_frag(s
 }
 
 struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card,
-		struct qdio_buffer *buffer,
+		struct qeth_qdio_buffer *qethbuffer,
 		struct qdio_buffer_element **__element, int *__offset,
 		struct qeth_hdr **hdr)
 {
 	struct qdio_buffer_element *element = *__element;
+	struct qdio_buffer *buffer = qethbuffer->buffer;
 	int offset = *__offset;
 	struct sk_buff *skb = NULL;
 	int skb_len = 0;
@@ -4643,9 +4792,10 @@ struct sk_buff *qeth_core_get_next_skb(s
 	if (!skb_len)
 		return NULL;
 
-	if ((skb_len >= card->options.rx_sg_cb) &&
-	    (!(card->info.type == QETH_CARD_TYPE_OSN)) &&
-	    (!atomic_read(&card->force_alloc_skb))) {
+	if (((skb_len >= card->options.rx_sg_cb) &&
+	     (!(card->info.type == QETH_CARD_TYPE_OSN)) &&
+	     (!atomic_read(&card->force_alloc_skb))) ||
+	    (card->options.cq == QETH_CQ_ENABLED)) {
 		use_rx_sg = 1;
 	} else {
 		skb = dev_alloc_skb(skb_len + headroom);
@@ -4660,8 +4810,8 @@ struct sk_buff *qeth_core_get_next_skb(s
 		data_len = min(skb_len, (int)(element->length - offset));
 		if (data_len) {
 			if (use_rx_sg) {
-				if (qeth_create_skb_frag(element, &skb, offset,
-				    &frag, data_len))
+				if (qeth_create_skb_frag(qethbuffer, element,
+				    &skb, offset, &frag, data_len))
 					goto no_mem;
 			} else {
 				memcpy(skb_put(skb, data_len), data_ptr,
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -409,7 +409,7 @@ static int qeth_l2_process_inbound_buffe
 	BUG_ON(!budget);
 	while (budget) {
 		skb = qeth_core_get_next_skb(card,
-			card->qdio.in_q->bufs[card->rx.b_index].buffer,
+			&card->qdio.in_q->bufs[card->rx.b_index],
 			&card->rx.b_element, &card->rx.e_offset, &hdr);
 		if (!skb) {
 			*done = 1;
--- a/drivers/s390/net/qeth_l3.h
+++ b/drivers/s390/net/qeth_l3.h
@@ -63,5 +63,9 @@ int qeth_l3_add_rxip(struct qeth_card *,
 void qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions,
 			const u8 *);
 int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *, struct qeth_ipaddr *);
+struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions);
+int qeth_l3_add_ip(struct qeth_card *, struct qeth_ipaddr *);
+int qeth_l3_delete_ip(struct qeth_card *, struct qeth_ipaddr *);
+void qeth_l3_set_ip_addr_list(struct qeth_card *);
 
 #endif /* __QETH_L3_H__ */
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -29,6 +29,7 @@
 #include <net/ip.h>
 #include <net/arp.h>
 #include <net/ip6_checksum.h>
+#include <net/iucv/af_iucv.h>
 
 #include "qeth_l3.h"
 
@@ -267,7 +268,7 @@ static int __qeth_l3_insert_ip_todo(stru
 	}
 }
 
-static int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *addr)
+int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *addr)
 {
 	unsigned long flags;
 	int rc = 0;
@@ -286,7 +287,7 @@ static int qeth_l3_delete_ip(struct qeth
 	return rc;
 }
 
-static int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *addr)
+int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *addr)
 {
 	unsigned long flags;
 	int rc = 0;
@@ -305,7 +306,7 @@ static int qeth_l3_add_ip(struct qeth_ca
 }
 
 
-static struct qeth_ipaddr *qeth_l3_get_addr_buffer(
+struct qeth_ipaddr *qeth_l3_get_addr_buffer(
 				enum qeth_prot_versions prot)
 {
 	struct qeth_ipaddr *addr;
@@ -421,7 +422,7 @@ again:
 	list_splice(&fail_list, &card->ip_list);
 }
 
-static void qeth_l3_set_ip_addr_list(struct qeth_card *card)
+void qeth_l3_set_ip_addr_list(struct qeth_card *card)
 {
 	struct list_head *tbd_list;
 	struct qeth_ipaddr *todo, *addr;
@@ -438,7 +439,7 @@ static void qeth_l3_set_ip_addr_list(str
 
 	spin_lock_irqsave(&card->ip_lock, flags);
 	tbd_list = card->ip_tbd_list;
-	card->ip_tbd_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
+	card->ip_tbd_list = kzalloc(sizeof(struct list_head), GFP_ATOMIC);
 	if (!card->ip_tbd_list) {
 		QETH_CARD_TEXT(card, 0, "silnomem");
 		card->ip_tbd_list = tbd_list;
@@ -1993,12 +1994,13 @@ static int qeth_l3_process_inbound_buffe
 	__u16 vlan_tag = 0;
 	int is_vlan;
 	unsigned int len;
+	__u16 magic;
 
 	*done = 0;
 	BUG_ON(!budget);
 	while (budget) {
 		skb = qeth_core_get_next_skb(card,
-			card->qdio.in_q->bufs[card->rx.b_index].buffer,
+			&card->qdio.in_q->bufs[card->rx.b_index],
 			&card->rx.b_element, &card->rx.e_offset, &hdr);
 		if (!skb) {
 			*done = 1;
@@ -2007,12 +2009,26 @@ static int qeth_l3_process_inbound_buffe
 		skb->dev = card->dev;
 		switch (hdr->hdr.l3.id) {
 		case QETH_HEADER_TYPE_LAYER3:
-			is_vlan = qeth_l3_rebuild_skb(card, skb, hdr,
+			magic = *(__u16 *)skb->data;
+			if ((card->info.type == QETH_CARD_TYPE_IQD) &&
+			    (magic == ETH_P_AF_IUCV)) {
+				skb->protocol = ETH_P_AF_IUCV;
+				skb->pkt_type = PACKET_HOST;
+				skb->mac_header = NET_SKB_PAD;
+				skb->dev = card->dev;
+				len = skb->len;
+				card->dev->header_ops->create(skb, card->dev, 0,
+					card->dev->dev_addr, "FAKELL",
+					card->dev->addr_len);
+				netif_receive_skb(skb);
+			} else {
+				is_vlan = qeth_l3_rebuild_skb(card, skb, hdr,
 						      &vlan_tag);
-			len = skb->len;
-			if (is_vlan && !card->options.sniffer)
-				__vlan_hwaccel_put_tag(skb, vlan_tag);
-			napi_gro_receive(&card->napi, skb);
+				len = skb->len;
+				if (is_vlan && !card->options.sniffer)
+					__vlan_hwaccel_put_tag(skb, vlan_tag);
+				napi_gro_receive(&card->napi, skb);
+			}
 			break;
 		case QETH_HEADER_TYPE_LAYER2: /* for HiperSockets sniffer */
 			skb->pkt_type = PACKET_HOST;
@@ -2784,6 +2800,30 @@ int inline qeth_l3_get_cast_type(struct
 	return cast_type;
 }
 
+static void qeth_l3_fill_af_iucv_hdr(struct qeth_card *card,
+		struct qeth_hdr *hdr, struct sk_buff *skb)
+{
+	char daddr[16];
+	struct af_iucv_trans_hdr *iucv_hdr;
+
+	skb_pull(skb, 14);
+	card->dev->header_ops->create(skb, card->dev, 0,
+				      card->dev->dev_addr, card->dev->dev_addr,
+				      card->dev->addr_len);
+	skb_pull(skb, 14);
+	iucv_hdr = (struct af_iucv_trans_hdr *)skb->data;
+	memset(hdr, 0, sizeof(struct qeth_hdr));
+	hdr->hdr.l3.id = QETH_HEADER_TYPE_LAYER3;
+	hdr->hdr.l3.ext_flags = 0;
+	hdr->hdr.l3.length = skb->len;
+	hdr->hdr.l3.flags = QETH_HDR_IPV6 | QETH_CAST_UNICAST;
+	memset(daddr, 0, sizeof(daddr));
+	daddr[0] = 0xfe;
+	daddr[1] = 0x80;
+	memcpy(&daddr[8], iucv_hdr->destUserID, 8);
+	memcpy(hdr->hdr.l3.dest_addr, daddr, 16);
+}
+
 static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
 		struct sk_buff *skb, int ipv, int cast_type)
 {
@@ -2936,8 +2976,11 @@ static int qeth_l3_hard_start_xmit(struc
 	int data_offset = -1;
 	int nr_frags;
 
-	if (((card->info.type == QETH_CARD_TYPE_IQD) && (!ipv)) ||
-	     card->options.sniffer)
+	if (((card->info.type == QETH_CARD_TYPE_IQD) &&
+	     (((card->options.cq != QETH_CQ_ENABLED) && !ipv) ||
+	      ((card->options.cq == QETH_CQ_ENABLED) &&
+	       (skb->protocol != ETH_P_AF_IUCV)))) ||
+	    card->options.sniffer)
 			goto tx_drop;
 
 	if ((card->state != CARD_STATE_UP) || !card->lan_online) {
@@ -2959,7 +3002,10 @@ static int qeth_l3_hard_start_xmit(struc
 	if ((card->info.type == QETH_CARD_TYPE_IQD) && (!large_send) &&
 	    (skb_shinfo(skb)->nr_frags == 0)) {
 		new_skb = skb;
-		data_offset = ETH_HLEN;
+		if (new_skb->protocol == ETH_P_AF_IUCV)
+			data_offset = 0;
+		else
+			data_offset = ETH_HLEN;
 		hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC);
 		if (!hdr)
 			goto tx_drop;
@@ -3025,9 +3071,13 @@ static int qeth_l3_hard_start_xmit(struc
 			qeth_l3_fill_header(card, hdr, new_skb, ipv,
 						cast_type);
 		} else {
-			qeth_l3_fill_header(card, hdr, new_skb, ipv,
-						cast_type);
-			hdr->hdr.l3.length = new_skb->len - data_offset;
+			if (new_skb->protocol == ETH_P_AF_IUCV)
+				qeth_l3_fill_af_iucv_hdr(card, hdr, new_skb);
+			else {
+				qeth_l3_fill_header(card, hdr, new_skb, ipv,
+							cast_type);
+				hdr->hdr.l3.length = new_skb->len - data_offset;
+			}
 		}
 
 		if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -3290,6 +3340,8 @@ static int qeth_l3_setup_netdev(struct q
 		card->dev->flags |= IFF_NOARP;
 		card->dev->netdev_ops = &qeth_l3_netdev_ops;
 		qeth_l3_iqd_read_initial_mac(card);
+		if (card->options.hsuid[0])
+			memcpy(card->dev->perm_addr, card->options.hsuid, 9);
 	} else
 		return -ENODEV;
 
@@ -3660,7 +3712,6 @@ static int qeth_l3_ip6_event(struct noti
 	struct qeth_ipaddr *addr;
 	struct qeth_card *card;
 
-
 	card = qeth_l3_get_card_from_dev(dev);
 	if (!card)
 		return NOTIFY_DONE;
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/slab.h>
-
+#include <asm/ebcdic.h>
 #include "qeth_l3.h"
 
 #define QETH_DEVICE_ATTR(_id, _name, _mode, _show, _store) \
@@ -308,6 +308,8 @@ static ssize_t qeth_l3_dev_sniffer_store
 
 	if (card->info.type != QETH_CARD_TYPE_IQD)
 		return -EPERM;
+	if (card->options.cq == QETH_CQ_ENABLED)
+		return -EPERM;
 
 	mutex_lock(&card->conf_mutex);
 	if ((card->state != CARD_STATE_DOWN) &&
@@ -347,6 +349,111 @@ out:
 static DEVICE_ATTR(sniffer, 0644, qeth_l3_dev_sniffer_show,
 		qeth_l3_dev_sniffer_store);
 
+
+static ssize_t qeth_l3_dev_hsuid_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct qeth_card *card = dev_get_drvdata(dev);
+	char tmp_hsuid[9];
+
+	if (!card)
+		return -EINVAL;
+
+	if (card->info.type != QETH_CARD_TYPE_IQD)
+		return -EPERM;
+
+	if (card->state == CARD_STATE_DOWN)
+		return -EPERM;
+
+	memcpy(tmp_hsuid, card->options.hsuid, sizeof(tmp_hsuid));
+	EBCASC(tmp_hsuid, 8);
+	return sprintf(buf, "%s\n", tmp_hsuid);
+}
+
+static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct qeth_card *card = dev_get_drvdata(dev);
+	struct qeth_ipaddr *addr;
+	char *tmp;
+	int i;
+
+	if (!card)
+		return -EINVAL;
+
+	if (card->info.type != QETH_CARD_TYPE_IQD)
+		return -EPERM;
+	if (card->state != CARD_STATE_DOWN &&
+	    card->state != CARD_STATE_RECOVER)
+		return -EPERM;
+	if (card->options.sniffer)
+		return -EPERM;
+	if (card->options.cq == QETH_CQ_NOTAVAILABLE)
+		return -EPERM;
+
+	tmp = strsep((char **)&buf, "\n");
+	if (strlen(tmp) > 8)
+		return -EINVAL;
+
+	if (card->options.hsuid[0]) {
+		/* delete old ip address */
+		addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
+		if (addr != NULL) {
+			addr->u.a6.addr.s6_addr32[0] = 0xfe800000;
+			addr->u.a6.addr.s6_addr32[1] = 0x00000000;
+			for (i = 8; i < 16; i++)
+				addr->u.a6.addr.s6_addr[i] =
+					card->options.hsuid[i - 8];
+			addr->u.a6.pfxlen = 0;
+			addr->type = QETH_IP_TYPE_NORMAL;
+		} else
+			return -ENOMEM;
+		if (!qeth_l3_delete_ip(card, addr))
+			kfree(addr);
+		qeth_l3_set_ip_addr_list(card);
+	}
+
+	if (strlen(tmp) == 0) {
+		/* delete ip address only */
+		card->options.hsuid[0] = '\0';
+		if (card->dev)
+			memcpy(card->dev->perm_addr, card->options.hsuid, 9);
+		qeth_configure_cq(card, QETH_CQ_DISABLED);
+		return count;
+	}
+
+	if (qeth_configure_cq(card, QETH_CQ_ENABLED))
+		return -EPERM;
+
+	for (i = 0; i < 8; i++)
+		card->options.hsuid[i] = ' ';
+	card->options.hsuid[8] = '\0';
+	strncpy(card->options.hsuid, tmp, strlen(tmp));
+	ASCEBC(card->options.hsuid, 8);
+	if (card->dev)
+		memcpy(card->dev->perm_addr, card->options.hsuid, 9);
+
+	addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
+	if (addr != NULL) {
+		addr->u.a6.addr.s6_addr32[0] = 0xfe800000;
+		addr->u.a6.addr.s6_addr32[1] = 0x00000000;
+		for (i = 8; i < 16; i++)
+			addr->u.a6.addr.s6_addr[i] = card->options.hsuid[i - 8];
+		addr->u.a6.pfxlen = 0;
+		addr->type = QETH_IP_TYPE_NORMAL;
+	} else
+		return -ENOMEM;
+	if (!qeth_l3_add_ip(card, addr))
+		kfree(addr);
+	qeth_l3_set_ip_addr_list(card);
+
+	return count;
+}
+
+static DEVICE_ATTR(hsuid, 0644, qeth_l3_dev_hsuid_show,
+		   qeth_l3_dev_hsuid_store);
+
+
 static struct attribute *qeth_l3_device_attrs[] = {
 	&dev_attr_route4.attr,
 	&dev_attr_route6.attr,
@@ -354,6 +461,7 @@ static struct attribute *qeth_l3_device_
 	&dev_attr_broadcast_mode.attr,
 	&dev_attr_canonical_macaddr.attr,
 	&dev_attr_sniffer.attr,
+	&dev_attr_hsuid.attr,
 	NULL,
 };
 


^ permalink raw reply

* [patch 06/11] [PATCH] af_iucv: add HiperSockets transport
From: frank.blaschka @ 2011-07-27 16:12 UTC (permalink / raw)
  To: netdev, linux-s390; +Cc: Ursula Braun
In-Reply-To: <20110727161242.018577444@de.ibm.com>

[-- Attachment #1: net1117-af_iucv-add-HiperSockets-transport.patch --]
[-- Type: text/plain, Size: 29293 bytes --]

From: Ursula Braun <ursula.braun@de.ibm.com>

The current transport mechanism for af_iucv is the z/VM offered
communications facility IUCV. To provide equivalent support when
running Linux in an LPAR, HiperSockets transport is added to the
AF_IUCV address family. It requires explicit binding of an AF_IUCV
socket to a HiperSockets device. A new packet_type ETH_P_AF_IUCV
is announced. An af_iucv specific transport header is defined
preceding the skb data. A small protocol is implemented for
connecting and for flow control/congestion management.

Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
---
 include/net/iucv/af_iucv.h |   52 +++
 net/iucv/af_iucv.c         |  743 ++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 724 insertions(+), 71 deletions(-)

--- a/include/net/iucv/af_iucv.h
+++ b/include/net/iucv/af_iucv.h
@@ -14,6 +14,7 @@
 #include <linux/list.h>
 #include <linux/poll.h>
 #include <linux/socket.h>
+#include <net/iucv/iucv.h>
 
 #ifndef AF_IUCV
 #define AF_IUCV		32
@@ -33,6 +34,7 @@ enum {
 };
 
 #define IUCV_QUEUELEN_DEFAULT	65535
+#define IUCV_HIPER_MSGLIM_DEFAULT	128
 #define IUCV_CONN_TIMEOUT	(HZ * 40)
 #define IUCV_DISCONN_TIMEOUT	(HZ * 2)
 #define IUCV_CONN_IDLE_TIMEOUT	(HZ * 60)
@@ -57,8 +59,51 @@ struct sock_msg_q {
 	spinlock_t		lock;
 };
 
+#define AF_IUCV_FLAG_ACK 0x1
+#define AF_IUCV_FLAG_SYN 0x2
+#define AF_IUCV_FLAG_FIN 0x4
+#define AF_IUCV_FLAG_WIN 0x8
+
+struct af_iucv_trans_hdr {
+	u16 magic;
+	u8 version;
+	u8 flags;
+	u16 window;
+	char destNodeID[8];
+	char destUserID[8];
+	char destAppName[16];
+	char srcNodeID[8];
+	char srcUserID[8];
+	char srcAppName[16];             /* => 70 bytes */
+	struct iucv_message iucv_hdr;    /* => 33 bytes */
+	u8 pad;                          /* total 104 bytes */
+} __packed;
+
+enum iucv_tx_notify {
+	/* transmission of skb is completed and was successful */
+	TX_NOTIFY_OK = 0,
+	/* target is unreachable */
+	TX_NOTIFY_UNREACHABLE = 1,
+	/* transfer pending queue full */
+	TX_NOTIFY_TPQFULL = 2,
+	/* general error */
+	TX_NOTIFY_GENERALERROR = 3,
+	/* transmission of skb is pending - may interleave
+	 * with TX_NOTIFY_DELAYED_* */
+	TX_NOTIFY_PENDING = 4,
+	/* transmission of skb was done successfully (delayed) */
+	TX_NOTIFY_DELAYED_OK = 5,
+	/* target unreachable (detected delayed) */
+	TX_NOTIFY_DELAYED_UNREACHABLE = 6,
+	/* general error (detected delayed) */
+	TX_NOTIFY_DELAYED_GENERALERROR = 7,
+};
+
 #define iucv_sk(__sk) ((struct iucv_sock *) __sk)
 
+#define AF_IUCV_TRANS_IUCV 0
+#define AF_IUCV_TRANS_HIPER 1
+
 struct iucv_sock {
 	struct sock		sk;
 	char			src_user_id[8];
@@ -75,6 +120,13 @@ struct iucv_sock {
 	unsigned int		send_tag;
 	u8			flags;
 	u16			msglimit;
+	u16			msglimit_peer;
+	atomic_t		msg_sent;
+	atomic_t		msg_recv;
+	atomic_t		pendings;
+	int			transport;
+	void                    (*sk_txnotify)(struct sk_buff *skb,
+					       enum iucv_tx_notify n);
 };
 
 /* iucv socket options (SOL_IUCV) */
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -27,7 +27,6 @@
 #include <asm/cpcmd.h>
 #include <linux/kmod.h>
 
-#include <net/iucv/iucv.h>
 #include <net/iucv/af_iucv.h>
 
 #define VERSION "1.1"
@@ -92,6 +91,12 @@ do {									\
 static void iucv_sock_kill(struct sock *sk);
 static void iucv_sock_close(struct sock *sk);
 
+static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
+	struct packet_type *pt, struct net_device *orig_dev);
+static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
+		   struct sk_buff *skb, u8 flags);
+static void afiucv_hs_callback_txnotify(struct sk_buff *, enum iucv_tx_notify);
+
 /* Call Back functions */
 static void iucv_callback_rx(struct iucv_path *, struct iucv_message *);
 static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *);
@@ -296,7 +301,11 @@ static inline int iucv_below_msglim(stru
 
 	if (sk->sk_state != IUCV_CONNECTED)
 		return 1;
-	return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim);
+	if (iucv->transport == AF_IUCV_TRANS_IUCV)
+		return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim);
+	else
+		return ((atomic_read(&iucv->msg_sent) < iucv->msglimit_peer) &&
+			(atomic_read(&iucv->pendings) <= 0));
 }
 
 /**
@@ -314,6 +323,79 @@ static void iucv_sock_wake_msglim(struct
 	rcu_read_unlock();
 }
 
+/**
+ * afiucv_hs_send() - send a message through HiperSockets transport
+ */
+static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
+		   struct sk_buff *skb, u8 flags)
+{
+	struct net *net = sock_net(sock);
+	struct iucv_sock *iucv = iucv_sk(sock);
+	struct af_iucv_trans_hdr *phs_hdr;
+	struct sk_buff *nskb;
+	int err, confirm_recv = 0;
+
+	memset(skb->head, 0, ETH_HLEN);
+	phs_hdr = (struct af_iucv_trans_hdr *)skb_push(skb,
+					sizeof(struct af_iucv_trans_hdr));
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb_push(skb, ETH_HLEN);
+	skb_reset_mac_header(skb);
+	memset(phs_hdr, 0, sizeof(struct af_iucv_trans_hdr));
+
+	phs_hdr->magic = ETH_P_AF_IUCV;
+	phs_hdr->version = 1;
+	phs_hdr->flags = flags;
+	if (flags == AF_IUCV_FLAG_SYN)
+		phs_hdr->window = iucv->msglimit;
+	else if ((flags == AF_IUCV_FLAG_WIN) || !flags) {
+		confirm_recv = atomic_read(&iucv->msg_recv);
+		phs_hdr->window = confirm_recv;
+		if (confirm_recv)
+			phs_hdr->flags = phs_hdr->flags | AF_IUCV_FLAG_WIN;
+	}
+	memcpy(phs_hdr->destUserID, iucv->dst_user_id, 8);
+	memcpy(phs_hdr->destAppName, iucv->dst_name, 8);
+	memcpy(phs_hdr->srcUserID, iucv->src_user_id, 8);
+	memcpy(phs_hdr->srcAppName, iucv->src_name, 8);
+	ASCEBC(phs_hdr->destUserID, sizeof(phs_hdr->destUserID));
+	ASCEBC(phs_hdr->destAppName, sizeof(phs_hdr->destAppName));
+	ASCEBC(phs_hdr->srcUserID, sizeof(phs_hdr->srcUserID));
+	ASCEBC(phs_hdr->srcAppName, sizeof(phs_hdr->srcAppName));
+	if (imsg)
+		memcpy(&phs_hdr->iucv_hdr, imsg, sizeof(struct iucv_message));
+
+	rcu_read_lock();
+	skb->dev = dev_get_by_index_rcu(net, sock->sk_bound_dev_if);
+	rcu_read_unlock();
+	if (!skb->dev)
+		return -ENODEV;
+	if (!(skb->dev->flags & IFF_UP))
+		return -ENETDOWN;
+	if (skb->len > skb->dev->mtu) {
+		if (sock->sk_type == SOCK_SEQPACKET)
+			return -EMSGSIZE;
+		else
+			skb_trim(skb, skb->dev->mtu);
+	}
+	skb->protocol = ETH_P_AF_IUCV;
+	skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF;
+	nskb = skb_clone(skb, GFP_ATOMIC);
+	if (!nskb)
+		return -ENOMEM;
+	skb_queue_tail(&iucv->send_skb_q, nskb);
+	err = dev_queue_xmit(skb);
+	if (err) {
+		skb_unlink(nskb, &iucv->send_skb_q);
+		kfree_skb(nskb);
+	} else {
+		atomic_sub(confirm_recv, &iucv->msg_recv);
+		WARN_ON(atomic_read(&iucv->msg_recv) < 0);
+	}
+	return err;
+}
+
 /* Timers */
 static void iucv_sock_timeout(unsigned long arg)
 {
@@ -382,6 +464,8 @@ static void iucv_sock_close(struct sock 
 	unsigned char user_data[16];
 	struct iucv_sock *iucv = iucv_sk(sk);
 	unsigned long timeo;
+	int err, blen;
+	struct sk_buff *skb;
 
 	iucv_sock_clear_timer(sk);
 	lock_sock(sk);
@@ -392,6 +476,20 @@ static void iucv_sock_close(struct sock 
 		break;
 
 	case IUCV_CONNECTED:
+		if (iucv->transport == AF_IUCV_TRANS_HIPER) {
+			/* send fin */
+			blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN;
+			skb = sock_alloc_send_skb(sk, blen, 1, &err);
+			if (skb) {
+				skb_reserve(skb,
+					sizeof(struct af_iucv_trans_hdr) +
+					ETH_HLEN);
+				err = afiucv_hs_send(NULL, sk, skb,
+						     AF_IUCV_FLAG_FIN);
+			}
+			sk->sk_state = IUCV_DISCONN;
+			sk->sk_state_change(sk);
+		}
 	case IUCV_DISCONN:
 		sk->sk_state = IUCV_CLOSING;
 		sk->sk_state_change(sk);
@@ -461,9 +559,13 @@ static struct sock *iucv_sock_alloc(stru
 	spin_lock_init(&iucv->message_q.lock);
 	skb_queue_head_init(&iucv->backlog_skb_q);
 	iucv->send_tag = 0;
+	atomic_set(&iucv->pendings, 0);
 	iucv->flags = 0;
-	iucv->msglimit = IUCV_QUEUELEN_DEFAULT;
+	iucv->msglimit = 0;
+	atomic_set(&iucv->msg_sent, 0);
+	atomic_set(&iucv->msg_recv, 0);
 	iucv->path = NULL;
+	iucv->sk_txnotify = afiucv_hs_callback_txnotify;
 	memset(&iucv->src_user_id , 0, 32);
 
 	sk->sk_destruct = iucv_sock_destruct;
@@ -595,7 +697,9 @@ static int iucv_sock_bind(struct socket 
 	struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr;
 	struct sock *sk = sock->sk;
 	struct iucv_sock *iucv;
-	int err;
+	int err = 0;
+	struct net_device *dev;
+	char uid[9];
 
 	/* Verify the input sockaddr */
 	if (!addr || addr->sa_family != AF_IUCV)
@@ -614,19 +718,46 @@ static int iucv_sock_bind(struct socket 
 		err = -EADDRINUSE;
 		goto done_unlock;
 	}
-	if (iucv->path) {
-		err = 0;
+	if (iucv->path)
 		goto done_unlock;
-	}
 
 	/* Bind the socket */
-	memcpy(iucv->src_name, sa->siucv_name, 8);
 
-	/* Copy the user id */
-	memcpy(iucv->src_user_id, iucv_userid, 8);
-	sk->sk_state = IUCV_BOUND;
-	err = 0;
+	if (pr_iucv)
+		if (!memcmp(sa->siucv_user_id, iucv_userid, 8))
+			goto vm_bind; /* VM IUCV transport */
 
+	/* try hiper transport */
+	memcpy(uid, sa->siucv_user_id, sizeof(uid));
+	ASCEBC(uid, 8);
+	rcu_read_lock();
+	for_each_netdev_rcu(&init_net, dev) {
+		if (!memcmp(dev->perm_addr, uid, 8)) {
+			memcpy(iucv->src_name, sa->siucv_name, 8);
+			memcpy(iucv->src_user_id, sa->siucv_user_id, 8);
+			sock->sk->sk_bound_dev_if = dev->ifindex;
+			sk->sk_state = IUCV_BOUND;
+			iucv->transport = AF_IUCV_TRANS_HIPER;
+			if (!iucv->msglimit)
+				iucv->msglimit = IUCV_HIPER_MSGLIM_DEFAULT;
+			rcu_read_unlock();
+			goto done_unlock;
+		}
+	}
+	rcu_read_unlock();
+vm_bind:
+	if (pr_iucv) {
+		/* use local userid for backward compat */
+		memcpy(iucv->src_name, sa->siucv_name, 8);
+		memcpy(iucv->src_user_id, iucv_userid, 8);
+		sk->sk_state = IUCV_BOUND;
+		iucv->transport = AF_IUCV_TRANS_IUCV;
+		if (!iucv->msglimit)
+			iucv->msglimit = IUCV_QUEUELEN_DEFAULT;
+		goto done_unlock;
+	}
+	/* found no dev to bind */
+	err = -ENODEV;
 done_unlock:
 	/* Release the socket list lock */
 	write_unlock_bh(&iucv_sk_list.lock);
@@ -662,12 +793,33 @@ static int iucv_sock_autobind(struct soc
 
 	memcpy(&iucv->src_name, name, 8);
 
+	if (!iucv->msglimit)
+		iucv->msglimit = IUCV_QUEUELEN_DEFAULT;
+
 	return err;
 }
 
-/* Connect an unconnected socket */
-static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
-			     int alen, int flags)
+static int afiucv_hs_connect(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+	int blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN;
+	int err = 0;
+
+	/* send syn */
+	skb = sock_alloc_send_skb(sk, blen, 1, &err);
+	if (!skb) {
+		err = -ENOMEM;
+		goto done;
+	}
+	skb->dev = NULL;
+	skb_reserve(skb, blen);
+	err = afiucv_hs_send(NULL, sk, skb, AF_IUCV_FLAG_SYN);
+done:
+	return err;
+}
+
+static int afiucv_path_connect(struct socket *sock, struct sockaddr *addr)
 {
 	struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr;
 	struct sock *sk = sock->sk;
@@ -675,27 +827,6 @@ static int iucv_sock_connect(struct sock
 	unsigned char user_data[16];
 	int err;
 
-	if (addr->sa_family != AF_IUCV || alen < sizeof(struct sockaddr_iucv))
-		return -EINVAL;
-
-	if (sk->sk_state != IUCV_OPEN && sk->sk_state != IUCV_BOUND)
-		return -EBADFD;
-
-	if (sk->sk_type != SOCK_STREAM && sk->sk_type != SOCK_SEQPACKET)
-		return -EINVAL;
-
-	if (sk->sk_state == IUCV_OPEN) {
-		err = iucv_sock_autobind(sk);
-		if (unlikely(err))
-			return err;
-	}
-
-	lock_sock(sk);
-
-	/* Set the destination information */
-	memcpy(iucv->dst_user_id, sa->siucv_user_id, 8);
-	memcpy(iucv->dst_name, sa->siucv_name, 8);
-
 	high_nmcpy(user_data, sa->siucv_name);
 	low_nmcpy(user_data, iucv->src_name);
 	ASCEBC(user_data, sizeof(user_data));
@@ -728,20 +859,61 @@ static int iucv_sock_connect(struct sock
 			err = -ECONNREFUSED;
 			break;
 		}
-		goto done;
+	}
+done:
+	return err;
+}
+
+/* Connect an unconnected socket */
+static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
+			     int alen, int flags)
+{
+	struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr;
+	struct sock *sk = sock->sk;
+	struct iucv_sock *iucv = iucv_sk(sk);
+	int err;
+
+	if (addr->sa_family != AF_IUCV || alen < sizeof(struct sockaddr_iucv))
+		return -EINVAL;
+
+	if (sk->sk_state != IUCV_OPEN && sk->sk_state != IUCV_BOUND)
+		return -EBADFD;
+
+	if (sk->sk_state == IUCV_OPEN &&
+	    iucv->transport == AF_IUCV_TRANS_HIPER)
+		return -EBADFD; /* explicit bind required */
+
+	if (sk->sk_type != SOCK_STREAM && sk->sk_type != SOCK_SEQPACKET)
+		return -EINVAL;
+
+	if (sk->sk_state == IUCV_OPEN) {
+		err = iucv_sock_autobind(sk);
+		if (unlikely(err))
+			return err;
 	}
 
-	if (sk->sk_state != IUCV_CONNECTED) {
+	lock_sock(sk);
+
+	/* Set the destination information */
+	memcpy(iucv->dst_user_id, sa->siucv_user_id, 8);
+	memcpy(iucv->dst_name, sa->siucv_name, 8);
+
+	if (iucv->transport == AF_IUCV_TRANS_HIPER)
+		err = afiucv_hs_connect(sock);
+	else
+		err = afiucv_path_connect(sock, addr);
+	if (err)
+		goto done;
+
+	if (sk->sk_state != IUCV_CONNECTED)
 		err = iucv_sock_wait(sk, iucv_sock_in_state(sk, IUCV_CONNECTED,
 							    IUCV_DISCONN),
 				     sock_sndtimeo(sk, flags & O_NONBLOCK));
-	}
 
-	if (sk->sk_state == IUCV_DISCONN) {
+	if (sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_CLOSED)
 		err = -ECONNREFUSED;
-	}
 
-	if (err) {
+	if (err && iucv->transport == AF_IUCV_TRANS_IUCV) {
 		pr_iucv->path_sever(iucv->path, NULL);
 		iucv_path_free(iucv->path);
 		iucv->path = NULL;
@@ -965,9 +1137,16 @@ static int iucv_sock_sendmsg(struct kioc
 	 * this is fine for SOCK_SEQPACKET (unless we want to support
 	 * segmented records using the MSG_EOR flag), but
 	 * for SOCK_STREAM we might want to improve it in future */
-	skb = sock_alloc_send_skb(sk, len, noblock, &err);
+	if (iucv->transport == AF_IUCV_TRANS_HIPER)
+		skb = sock_alloc_send_skb(sk,
+			len + sizeof(struct af_iucv_trans_hdr) + ETH_HLEN,
+			noblock, &err);
+	else
+		skb = sock_alloc_send_skb(sk, len, noblock, &err);
 	if (!skb)
 		goto out;
+	if (iucv->transport == AF_IUCV_TRANS_HIPER)
+		skb_reserve(skb, sizeof(struct af_iucv_trans_hdr) + ETH_HLEN);
 	if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
 		err = -EFAULT;
 		goto fail;
@@ -988,6 +1167,15 @@ static int iucv_sock_sendmsg(struct kioc
 	/* increment and save iucv message tag for msg_completion cbk */
 	txmsg.tag = iucv->send_tag++;
 	memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN);
+	if (iucv->transport == AF_IUCV_TRANS_HIPER) {
+		atomic_inc(&iucv->msg_sent);
+		err = afiucv_hs_send(&txmsg, sk, skb, 0);
+		if (err) {
+			atomic_dec(&iucv->msg_sent);
+			goto fail;
+		}
+		goto release;
+	}
 	skb_queue_tail(&iucv->send_skb_q, skb);
 
 	if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags)
@@ -1028,6 +1216,7 @@ static int iucv_sock_sendmsg(struct kioc
 		goto fail;
 	}
 
+release:
 	release_sock(sk);
 	return len;
 
@@ -1160,7 +1349,8 @@ static int iucv_sock_recvmsg(struct kioc
 	struct sock *sk = sock->sk;
 	struct iucv_sock *iucv = iucv_sk(sk);
 	unsigned int copied, rlen;
-	struct sk_buff *skb, *rskb, *cskb;
+	struct sk_buff *skb, *rskb, *cskb, *sskb;
+	int blen;
 	int err = 0;
 
 	if ((sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED) &&
@@ -1185,7 +1375,7 @@ static int iucv_sock_recvmsg(struct kioc
 	copied = min_t(unsigned int, rlen, len);
 
 	cskb = skb;
-	if (memcpy_toiovec(msg->msg_iov, cskb->data, copied)) {
+	if (skb_copy_datagram_iovec(cskb, 0, msg->msg_iov, copied)) {
 		if (!(flags & MSG_PEEK))
 			skb_queue_head(&sk->sk_receive_queue, skb);
 		return -EFAULT;
@@ -1223,6 +1413,7 @@ static int iucv_sock_recvmsg(struct kioc
 		}
 
 		kfree_skb(skb);
+		atomic_inc(&iucv->msg_recv);
 
 		/* Queue backlog skbs */
 		spin_lock_bh(&iucv->message_q.lock);
@@ -1239,6 +1430,24 @@ static int iucv_sock_recvmsg(struct kioc
 		if (skb_queue_empty(&iucv->backlog_skb_q)) {
 			if (!list_empty(&iucv->message_q.list))
 				iucv_process_message_q(sk);
+			if (atomic_read(&iucv->msg_recv) >=
+							iucv->msglimit / 2) {
+				/* send WIN to peer */
+				blen = sizeof(struct af_iucv_trans_hdr) +
+					ETH_HLEN;
+				sskb = sock_alloc_send_skb(sk, blen, 1, &err);
+				if (sskb) {
+					skb_reserve(sskb,
+						sizeof(struct af_iucv_trans_hdr)
+						+ ETH_HLEN);
+					err = afiucv_hs_send(NULL, sk, sskb,
+							     AF_IUCV_FLAG_WIN);
+				}
+				if (err) {
+					sk->sk_state = IUCV_DISCONN;
+					sk->sk_state_change(sk);
+				}
+			}
 		}
 		spin_unlock_bh(&iucv->message_q.lock);
 	}
@@ -1698,6 +1907,389 @@ static void iucv_callback_shutdown(struc
 	bh_unlock_sock(sk);
 }
 
+/***************** HiperSockets transport callbacks ********************/
+static void afiucv_swap_src_dest(struct sk_buff *skb)
+{
+	struct af_iucv_trans_hdr *trans_hdr =
+				(struct af_iucv_trans_hdr *)skb->data;
+	char tmpID[8];
+	char tmpName[8];
+
+	ASCEBC(trans_hdr->destUserID, sizeof(trans_hdr->destUserID));
+	ASCEBC(trans_hdr->destAppName, sizeof(trans_hdr->destAppName));
+	ASCEBC(trans_hdr->srcUserID, sizeof(trans_hdr->srcUserID));
+	ASCEBC(trans_hdr->srcAppName, sizeof(trans_hdr->srcAppName));
+	memcpy(tmpID, trans_hdr->srcUserID, 8);
+	memcpy(tmpName, trans_hdr->srcAppName, 8);
+	memcpy(trans_hdr->srcUserID, trans_hdr->destUserID, 8);
+	memcpy(trans_hdr->srcAppName, trans_hdr->destAppName, 8);
+	memcpy(trans_hdr->destUserID, tmpID, 8);
+	memcpy(trans_hdr->destAppName, tmpName, 8);
+	skb_push(skb, ETH_HLEN);
+	memset(skb->data, 0, ETH_HLEN);
+}
+
+/**
+ * afiucv_hs_callback_syn - react on received SYN
+ **/
+static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb)
+{
+	struct sock *nsk;
+	struct iucv_sock *iucv, *niucv;
+	struct af_iucv_trans_hdr *trans_hdr;
+	int err;
+
+	iucv = iucv_sk(sk);
+	trans_hdr = (struct af_iucv_trans_hdr *)skb->data;
+	if (!iucv) {
+		/* no sock - connection refused */
+		afiucv_swap_src_dest(skb);
+		trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN;
+		err = dev_queue_xmit(skb);
+		goto out;
+	}
+
+	nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC);
+	bh_lock_sock(sk);
+	if ((sk->sk_state != IUCV_LISTEN) ||
+	    sk_acceptq_is_full(sk) ||
+	    !nsk) {
+		/* error on server socket - connection refused */
+		if (nsk)
+			sk_free(nsk);
+		afiucv_swap_src_dest(skb);
+		trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN;
+		err = dev_queue_xmit(skb);
+		bh_unlock_sock(sk);
+		goto out;
+	}
+
+	niucv = iucv_sk(nsk);
+	iucv_sock_init(nsk, sk);
+	niucv->transport = AF_IUCV_TRANS_HIPER;
+	niucv->msglimit = iucv->msglimit;
+	if (!trans_hdr->window)
+		niucv->msglimit_peer = IUCV_HIPER_MSGLIM_DEFAULT;
+	else
+		niucv->msglimit_peer = trans_hdr->window;
+	memcpy(niucv->dst_name, trans_hdr->srcAppName, 8);
+	memcpy(niucv->dst_user_id, trans_hdr->srcUserID, 8);
+	memcpy(niucv->src_name, iucv->src_name, 8);
+	memcpy(niucv->src_user_id, iucv->src_user_id, 8);
+	nsk->sk_bound_dev_if = sk->sk_bound_dev_if;
+	afiucv_swap_src_dest(skb);
+	trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_ACK;
+	trans_hdr->window = niucv->msglimit;
+	/* if receiver acks the xmit connection is established */
+	err = dev_queue_xmit(skb);
+	if (!err) {
+		iucv_accept_enqueue(sk, nsk);
+		nsk->sk_state = IUCV_CONNECTED;
+		sk->sk_data_ready(sk, 1);
+	} else
+		iucv_sock_kill(nsk);
+	bh_unlock_sock(sk);
+
+out:
+	return NET_RX_SUCCESS;
+}
+
+/**
+ * afiucv_hs_callback_synack() - react on received SYN-ACK
+ **/
+static int afiucv_hs_callback_synack(struct sock *sk, struct sk_buff *skb)
+{
+	struct iucv_sock *iucv = iucv_sk(sk);
+	struct af_iucv_trans_hdr *trans_hdr =
+					(struct af_iucv_trans_hdr *)skb->data;
+
+	if (!iucv)
+		goto out;
+	if (sk->sk_state != IUCV_BOUND)
+		goto out;
+	bh_lock_sock(sk);
+	iucv->msglimit_peer = trans_hdr->window;
+	sk->sk_state = IUCV_CONNECTED;
+	sk->sk_state_change(sk);
+	bh_unlock_sock(sk);
+out:
+	kfree_skb(skb);
+	return NET_RX_SUCCESS;
+}
+
+/**
+ * afiucv_hs_callback_synfin() - react on received SYN_FIN
+ **/
+static int afiucv_hs_callback_synfin(struct sock *sk, struct sk_buff *skb)
+{
+	struct iucv_sock *iucv = iucv_sk(sk);
+
+	if (!iucv)
+		goto out;
+	if (sk->sk_state != IUCV_BOUND)
+		goto out;
+	bh_lock_sock(sk);
+	sk->sk_state = IUCV_DISCONN;
+	sk->sk_state_change(sk);
+	bh_unlock_sock(sk);
+out:
+	kfree_skb(skb);
+	return NET_RX_SUCCESS;
+}
+
+/**
+ * afiucv_hs_callback_fin() - react on received FIN
+ **/
+static int afiucv_hs_callback_fin(struct sock *sk, struct sk_buff *skb)
+{
+	struct iucv_sock *iucv = iucv_sk(sk);
+
+	/* other end of connection closed */
+	if (iucv) {
+		bh_lock_sock(sk);
+		if (!list_empty(&iucv->accept_q))
+			sk->sk_state = IUCV_SEVERED;
+		else
+			sk->sk_state = IUCV_DISCONN;
+		sk->sk_state_change(sk);
+		bh_unlock_sock(sk);
+	}
+	kfree_skb(skb);
+	return NET_RX_SUCCESS;
+}
+
+/**
+ * afiucv_hs_callback_win() - react on received WIN
+ **/
+static int afiucv_hs_callback_win(struct sock *sk, struct sk_buff *skb)
+{
+	struct iucv_sock *iucv = iucv_sk(sk);
+	struct af_iucv_trans_hdr *trans_hdr =
+					(struct af_iucv_trans_hdr *)skb->data;
+
+	if (!iucv)
+		return NET_RX_SUCCESS;
+
+	if (sk->sk_state != IUCV_CONNECTED)
+		return NET_RX_SUCCESS;
+
+	atomic_sub(trans_hdr->window, &iucv->msg_sent);
+	iucv_sock_wake_msglim(sk);
+	return NET_RX_SUCCESS;
+}
+
+/**
+ * afiucv_hs_callback_rx() - react on received data
+ **/
+static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb)
+{
+	struct iucv_sock *iucv = iucv_sk(sk);
+
+	if (!iucv) {
+		kfree_skb(skb);
+		return NET_RX_SUCCESS;
+	}
+
+	if (sk->sk_state != IUCV_CONNECTED) {
+		kfree_skb(skb);
+		return NET_RX_SUCCESS;
+	}
+
+		/* write stuff from iucv_msg to skb cb */
+	if (skb->len <= sizeof(struct af_iucv_trans_hdr)) {
+		kfree_skb(skb);
+		return NET_RX_SUCCESS;
+	}
+	skb_pull(skb, sizeof(struct af_iucv_trans_hdr));
+	skb_reset_transport_header(skb);
+	skb_reset_network_header(skb);
+	spin_lock(&iucv->message_q.lock);
+	if (skb_queue_empty(&iucv->backlog_skb_q)) {
+		if (sock_queue_rcv_skb(sk, skb)) {
+			/* handle rcv queue full */
+			skb_queue_tail(&iucv->backlog_skb_q, skb);
+		}
+	} else
+		skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb);
+	spin_unlock(&iucv->message_q.lock);
+	return NET_RX_SUCCESS;
+}
+
+/**
+ * afiucv_hs_rcv() - base function for arriving data through HiperSockets
+ *                   transport
+ *                   called from netif RX softirq
+ **/
+static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
+	struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct hlist_node *node;
+	struct sock *sk;
+	struct iucv_sock *iucv;
+	struct af_iucv_trans_hdr *trans_hdr;
+	char nullstring[8];
+	int err = 0;
+
+	skb_pull(skb, ETH_HLEN);
+	trans_hdr = (struct af_iucv_trans_hdr *)skb->data;
+	EBCASC(trans_hdr->destAppName, sizeof(trans_hdr->destAppName));
+	EBCASC(trans_hdr->destUserID, sizeof(trans_hdr->destUserID));
+	EBCASC(trans_hdr->srcAppName, sizeof(trans_hdr->srcAppName));
+	EBCASC(trans_hdr->srcUserID, sizeof(trans_hdr->srcUserID));
+	memset(nullstring, 0, sizeof(nullstring));
+	iucv = NULL;
+	sk = NULL;
+	read_lock(&iucv_sk_list.lock);
+	sk_for_each(sk, node, &iucv_sk_list.head) {
+		if (trans_hdr->flags == AF_IUCV_FLAG_SYN) {
+			if ((!memcmp(&iucv_sk(sk)->src_name,
+				     trans_hdr->destAppName, 8)) &&
+			    (!memcmp(&iucv_sk(sk)->src_user_id,
+				     trans_hdr->destUserID, 8)) &&
+			    (!memcmp(&iucv_sk(sk)->dst_name, nullstring, 8)) &&
+			    (!memcmp(&iucv_sk(sk)->dst_user_id,
+				     nullstring, 8))) {
+				iucv = iucv_sk(sk);
+				break;
+			}
+		} else {
+			if ((!memcmp(&iucv_sk(sk)->src_name,
+				     trans_hdr->destAppName, 8)) &&
+			    (!memcmp(&iucv_sk(sk)->src_user_id,
+				     trans_hdr->destUserID, 8)) &&
+			    (!memcmp(&iucv_sk(sk)->dst_name,
+				     trans_hdr->srcAppName, 8)) &&
+			    (!memcmp(&iucv_sk(sk)->dst_user_id,
+				     trans_hdr->srcUserID, 8))) {
+				iucv = iucv_sk(sk);
+				break;
+			}
+		}
+	}
+	read_unlock(&iucv_sk_list.lock);
+	if (!iucv)
+		sk = NULL;
+
+	/* no sock
+	how should we send with no sock
+	1) send without sock no send rc checking?
+	2) introduce default sock to handle this cases
+
+	 SYN -> send SYN|ACK in good case, send SYN|FIN in bad case
+	 data -> send FIN
+	 SYN|ACK, SYN|FIN, FIN -> no action? */
+
+	switch (trans_hdr->flags) {
+	case AF_IUCV_FLAG_SYN:
+		/* connect request */
+		err = afiucv_hs_callback_syn(sk, skb);
+		break;
+	case (AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_ACK):
+		/* connect request confirmed */
+		err = afiucv_hs_callback_synack(sk, skb);
+		break;
+	case (AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN):
+		/* connect request refused */
+		err = afiucv_hs_callback_synfin(sk, skb);
+		break;
+	case (AF_IUCV_FLAG_FIN):
+		/* close request */
+		err = afiucv_hs_callback_fin(sk, skb);
+		break;
+	case (AF_IUCV_FLAG_WIN):
+		err = afiucv_hs_callback_win(sk, skb);
+		if (skb->len > sizeof(struct af_iucv_trans_hdr))
+			err = afiucv_hs_callback_rx(sk, skb);
+		else
+			kfree(skb);
+		break;
+	case 0:
+		/* plain data frame */
+		err = afiucv_hs_callback_rx(sk, skb);
+		break;
+	default:
+		;
+	}
+
+	return err;
+}
+
+/**
+ * afiucv_hs_callback_txnotify() - handle send notifcations from HiperSockets
+ *                                 transport
+ **/
+static void afiucv_hs_callback_txnotify(struct sk_buff *skb,
+					enum iucv_tx_notify n)
+{
+	struct sock *isk = skb->sk;
+	struct sock *sk = NULL;
+	struct iucv_sock *iucv = NULL;
+	struct sk_buff_head *list;
+	struct sk_buff *list_skb;
+	struct sk_buff *this = NULL;
+	unsigned long flags;
+	struct hlist_node *node;
+
+	read_lock(&iucv_sk_list.lock);
+	sk_for_each(sk, node, &iucv_sk_list.head)
+		if (sk == isk) {
+			iucv = iucv_sk(sk);
+			break;
+		}
+	read_unlock(&iucv_sk_list.lock);
+
+	if (!iucv)
+		return;
+
+	bh_lock_sock(sk);
+	list = &iucv->send_skb_q;
+	list_skb = list->next;
+	if (skb_queue_empty(list))
+		goto out_unlock;
+
+	spin_lock_irqsave(&list->lock, flags);
+	while (list_skb != (struct sk_buff *)list) {
+		if (skb_shinfo(list_skb) == skb_shinfo(skb)) {
+			this = list_skb;
+			switch (n) {
+			case TX_NOTIFY_OK:
+				__skb_unlink(this, list);
+				iucv_sock_wake_msglim(sk);
+				kfree_skb(this);
+				break;
+			case TX_NOTIFY_PENDING:
+				atomic_inc(&iucv->pendings);
+				break;
+			case TX_NOTIFY_DELAYED_OK:
+				__skb_unlink(this, list);
+				atomic_dec(&iucv->pendings);
+				if (atomic_read(&iucv->pendings) <= 0)
+					iucv_sock_wake_msglim(sk);
+				kfree_skb(this);
+				break;
+			case TX_NOTIFY_UNREACHABLE:
+			case TX_NOTIFY_DELAYED_UNREACHABLE:
+			case TX_NOTIFY_TPQFULL: /* not yet used */
+			case TX_NOTIFY_GENERALERROR:
+			case TX_NOTIFY_DELAYED_GENERALERROR:
+				__skb_unlink(this, list);
+				kfree_skb(this);
+				if (!list_empty(&iucv->accept_q))
+					sk->sk_state = IUCV_SEVERED;
+				else
+					sk->sk_state = IUCV_DISCONN;
+				sk->sk_state_change(sk);
+				break;
+			}
+			break;
+		}
+		list_skb = list_skb->next;
+	}
+	spin_unlock_irqrestore(&list->lock, flags);
+
+out_unlock:
+	bh_unlock_sock(sk);
+}
 static const struct proto_ops iucv_sock_ops = {
 	.family		= PF_IUCV,
 	.owner		= THIS_MODULE,
@@ -1724,7 +2316,12 @@ static const struct net_proto_family iuc
 	.create	= iucv_sock_create,
 };
 
-static int __init afiucv_iucv_init(void)
+static struct packet_type iucv_packet_type = {
+	.type = cpu_to_be16(ETH_P_AF_IUCV),
+	.func = afiucv_hs_rcv,
+};
+
+static int afiucv_iucv_init(void)
 {
 	int err;
 
@@ -1763,24 +2360,22 @@ static int __init afiucv_init(void)
 {
 	int err;
 
-	if (!MACHINE_IS_VM) {
-		pr_err("The af_iucv module cannot be loaded"
-		       " without z/VM\n");
-		err = -EPROTONOSUPPORT;
-		goto out;
-	}
-	cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err);
-	if (unlikely(err)) {
-		WARN_ON(err);
-		err = -EPROTONOSUPPORT;
-		goto out;
-	}
+	if (MACHINE_IS_VM) {
+		cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err);
+		if (unlikely(err)) {
+			WARN_ON(err);
+			err = -EPROTONOSUPPORT;
+			goto out;
+		}
 
-	pr_iucv = try_then_request_module(symbol_get(iucv_if), "iucv");
-	if (!pr_iucv) {
-		printk(KERN_WARNING "iucv_if lookup failed\n");
-		err = -EPROTONOSUPPORT;
-		goto out;
+		pr_iucv = try_then_request_module(symbol_get(iucv_if), "iucv");
+		if (!pr_iucv) {
+			printk(KERN_WARNING "iucv_if lookup failed\n");
+			memset(&iucv_userid, 0, sizeof(iucv_userid));
+		}
+	} else {
+		memset(&iucv_userid, 0, sizeof(iucv_userid));
+		pr_iucv = NULL;
 	}
 
 	err = proto_register(&iucv_proto, 0);
@@ -1790,10 +2385,12 @@ static int __init afiucv_init(void)
 	if (err)
 		goto out_proto;
 
-	err = afiucv_iucv_init();
-	if (err)
-		goto out_sock;
-
+	if (pr_iucv) {
+		err = afiucv_iucv_init();
+		if (err)
+			goto out_sock;
+	}
+	dev_add_pack(&iucv_packet_type);
 	return 0;
 
 out_sock:
@@ -1808,10 +2405,13 @@ out:
 
 static void __exit afiucv_exit(void)
 {
-	device_unregister(af_iucv_dev);
-	driver_unregister(&af_iucv_driver);
-	pr_iucv->iucv_unregister(&af_iucv_handler, 0);
-	symbol_put(iucv_if);
+	if (pr_iucv) {
+		device_unregister(af_iucv_dev);
+		driver_unregister(&af_iucv_driver);
+		pr_iucv->iucv_unregister(&af_iucv_handler, 0);
+		symbol_put(iucv_if);
+	}
+	dev_remove_pack(&iucv_packet_type);
 	sock_unregister(PF_IUCV);
 	proto_unregister(&iucv_proto);
 }
@@ -1824,3 +2424,4 @@ MODULE_DESCRIPTION("IUCV Sockets ver " V
 MODULE_VERSION(VERSION);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_NETPROTO(PF_IUCV);
+


^ permalink raw reply

* [patch 07/11] [PATCH] qdio: support asynchronous delivery of storage blocks
From: frank.blaschka @ 2011-07-27 16:12 UTC (permalink / raw)
  To: netdev, linux-s390; +Cc: Einar Lueck, Jan Glauber
In-Reply-To: <20110727161242.018577444@de.ibm.com>

[-- Attachment #1: net1117_qdio_cq.patch --]
[-- Type: text/plain, Size: 29747 bytes --]

From: Einar Lueck <elelueck@de.ibm.com>

This patch introduces support for asynchronous delivery of storage blocks for
Hipersockets. Upper layers may exploit this functionality to reuse SBALs for
which the delivery status is still pending.

Signed-off-by: Einar Lueck <elelueck@de.ibm.com>
Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
---
 arch/s390/include/asm/qdio.h      |   68 ++++++++++++
 drivers/s390/cio/qdio.h           |   29 +++++
 drivers/s390/cio/qdio_debug.c     |    3 
 drivers/s390/cio/qdio_main.c      |  203 +++++++++++++++++++++++++++++++++-----
 drivers/s390/cio/qdio_setup.c     |   83 ++++++++++++++-
 drivers/s390/cio/qdio_thinint.c   |   88 +++++++++-------
 drivers/s390/net/qeth_core_main.c |   30 ++++-
 7 files changed, 424 insertions(+), 80 deletions(-)

--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -123,6 +123,40 @@ struct slibe {
 };
 
 /**
+ * struct qaob - queue asynchronous operation block
+ * @res0: reserved parameters
+ * @res1: reserved parameter
+ * @res2: reserved parameter
+ * @res3: reserved parameter
+ * @aorc: asynchronous operation return code
+ * @flags: internal flags
+ * @cbtbs: control block type
+ * @sb_count: number of storage blocks
+ * @sba: storage block element addresses
+ * @dcount: size of storage block elements
+ * @user0: user defineable value
+ * @res4: reserved paramater
+ * @user1: user defineable value
+ * @user2: user defineable value
+ */
+struct qaob {
+	u64 res0[6];
+	u8 res1;
+	u8 res2;
+	u8 res3;
+	u8 aorc;
+	u8 flags;
+	u16 cbtbs;
+	u8 sb_count;
+	u64 sba[QDIO_MAX_ELEMENTS_PER_BUFFER];
+	u16 dcount[QDIO_MAX_ELEMENTS_PER_BUFFER];
+	u64 user0;
+	u64 res4[2];
+	u64 user1;
+	u64 user2;
+} __attribute__ ((packed, aligned(256)));
+
+/**
  * struct slib - storage list information block (SLIB)
  * @nsliba: next SLIB address (if any)
  * @sla: SL address
@@ -225,6 +259,31 @@ struct slsb {
 #define CHSC_AC2_DATA_DIV_AVAILABLE	0x0010
 #define CHSC_AC2_DATA_DIV_ENABLED	0x0002
 
+/**
+ * struct qdio_outbuf_state - SBAL related asynchronous operation information
+ *   (for communication with upper layer programs)
+ *   (only required for use with completion queues)
+ * @flags: flags indicating state of buffer
+ * @aob: pointer to QAOB used for the particular SBAL
+ * @user: pointer to upper layer program's state information related to SBAL
+ *        (stored in user1 data of QAOB)
+ */
+struct qdio_outbuf_state {
+	u8 flags;
+	struct qaob *aob;
+	void *user;
+};
+
+#define QDIO_OUTBUF_STATE_FLAG_NONE	0x00
+#define QDIO_OUTBUF_STATE_FLAG_PENDING	0x01
+
+#define CHSC_AC1_INITIATE_INPUTQ	0x80
+
+#define CHSC_AC2_DATA_DIV_AVAILABLE	0x0010
+#define CHSC_AC2_DATA_DIV_ENABLED	0x0002
+
+#define CHSC_AC3_FORMAT2_CQ_AVAILABLE	0x8000
+
 struct qdio_ssqd_desc {
 	u8 flags;
 	u8:8;
@@ -243,8 +302,7 @@ struct qdio_ssqd_desc {
 	u64 sch_token;
 	u8 mro;
 	u8 mri;
-	u8:8;
-	u8 sbalic;
+	u16 qdioac3;
 	u16:16;
 	u8:8;
 	u8 mmwc;
@@ -280,9 +338,11 @@ typedef void qdio_handler_t(struct ccw_d
  * @no_output_qs: number of output queues
  * @input_handler: handler to be called for input queues
  * @output_handler: handler to be called for output queues
+ * @queue_start_poll: polling handlers (one per input queue or NULL)
  * @int_parm: interruption parameter
  * @input_sbal_addr_array:  address of no_input_qs * 128 pointers
  * @output_sbal_addr_array: address of no_output_qs * 128 pointers
+ * @output_sbal_state_array: no_output_qs * 128 state info (for CQ or NULL)
  */
 struct qdio_initialize {
 	struct ccw_device *cdev;
@@ -297,11 +357,12 @@ struct qdio_initialize {
 	unsigned int no_output_qs;
 	qdio_handler_t *input_handler;
 	qdio_handler_t *output_handler;
-	void (*queue_start_poll) (struct ccw_device *, int, unsigned long);
+	void (**queue_start_poll) (struct ccw_device *, int, unsigned long);
 	int scan_threshold;
 	unsigned long int_parm;
 	void **input_sbal_addr_array;
 	void **output_sbal_addr_array;
+	struct qdio_outbuf_state *output_sbal_state_array;
 };
 
 #define QDIO_STATE_INACTIVE		0x00000002 /* after qdio_cleanup */
@@ -316,6 +377,7 @@ struct qdio_initialize {
 extern int qdio_allocate(struct qdio_initialize *);
 extern int qdio_establish(struct qdio_initialize *);
 extern int qdio_activate(struct ccw_device *);
+extern void qdio_release_aob(struct qaob *);
 extern int do_QDIO(struct ccw_device *, unsigned int, int, unsigned int,
 		   unsigned int);
 extern int qdio_start_irq(struct ccw_device *, int);
--- a/drivers/s390/cio/qdio.h
+++ b/drivers/s390/cio/qdio.h
@@ -42,6 +42,7 @@ enum qdio_irq_states {
 #define SLSB_STATE_NOT_INIT	0x0
 #define SLSB_STATE_EMPTY	0x1
 #define SLSB_STATE_PRIMED	0x2
+#define SLSB_STATE_PENDING	0x3
 #define SLSB_STATE_HALTED	0xe
 #define SLSB_STATE_ERROR	0xf
 #define SLSB_TYPE_INPUT		0x0
@@ -65,6 +66,8 @@ enum qdio_irq_states {
 	(SLSB_OWNER_PROG | SLSB_TYPE_OUTPUT | SLSB_STATE_NOT_INIT) /* 0xa0 */
 #define SLSB_P_OUTPUT_EMPTY	\
 	(SLSB_OWNER_PROG | SLSB_TYPE_OUTPUT | SLSB_STATE_EMPTY)	   /* 0xa1 */
+#define SLSB_P_OUTPUT_PENDING \
+	(SLSB_OWNER_PROG | SLSB_TYPE_OUTPUT | SLSB_STATE_PENDING)  /* 0xa3 */
 #define SLSB_CU_OUTPUT_PRIMED	\
 	(SLSB_OWNER_CU | SLSB_TYPE_OUTPUT | SLSB_STATE_PRIMED)	   /* 0x62 */
 #define SLSB_P_OUTPUT_HALTED	\
@@ -95,6 +98,7 @@ enum qdio_irq_states {
 #define QDIO_SIGA_WRITE		0x00
 #define QDIO_SIGA_READ		0x01
 #define QDIO_SIGA_SYNC		0x02
+#define QDIO_SIGA_WRITEQ	0x04
 #define QDIO_SIGA_QEBSM_FLAG	0x80
 
 #ifdef CONFIG_64BIT
@@ -251,6 +255,12 @@ struct qdio_input_q {
 struct qdio_output_q {
 	/* PCIs are enabled for the queue */
 	int pci_out_enabled;
+	/* cq: use asynchronous output buffers */
+	int use_cq;
+	/* cq: aobs used for particual SBAL */
+	struct qaob **aobs;
+	/* cq: sbal state related to asynchronous operation */
+	struct qdio_outbuf_state *sbal_state;
 	/* timer to check for more outbound work */
 	struct timer_list timer;
 	/* used SBALs before tasklet schedule */
@@ -430,9 +440,20 @@ struct indicator_t {
 
 extern struct indicator_t *q_indicators;
 
-static inline int shared_ind(u32 *dsci)
+static inline int has_multiple_inq_on_dsci(struct qdio_irq *irq)
 {
-	return dsci == &q_indicators[TIQDIO_SHARED_IND].ind;
+	return irq->nr_input_qs > 1;
+}
+
+static inline int references_shared_dsci(struct qdio_irq *irq)
+{
+	return irq->dsci == &q_indicators[TIQDIO_SHARED_IND].ind;
+}
+
+static inline int shared_ind(struct qdio_q *q)
+{
+	struct qdio_irq *i = q->irq_ptr;
+	return references_shared_dsci(i) || has_multiple_inq_on_dsci(i);
 }
 
 /* prototypes for thin interrupt */
@@ -447,6 +468,7 @@ void tiqdio_free_memory(void);
 int tiqdio_register_thinints(void);
 void tiqdio_unregister_thinints(void);
 
+
 /* prototypes for setup */
 void qdio_inbound_processing(unsigned long data);
 void qdio_outbound_processing(unsigned long data);
@@ -467,6 +489,9 @@ int qdio_setup_create_sysfs(struct ccw_d
 void qdio_setup_destroy_sysfs(struct ccw_device *cdev);
 int qdio_setup_init(void);
 void qdio_setup_exit(void);
+int qdio_enable_async_operation(struct qdio_output_q *q);
+void qdio_disable_async_operation(struct qdio_output_q *q);
+struct qaob *qdio_allocate_aob(void);
 
 int debug_get_buf_state(struct qdio_q *q, unsigned int bufnr,
 			unsigned char *state);
--- a/drivers/s390/cio/qdio_debug.c
+++ b/drivers/s390/cio/qdio_debug.c
@@ -76,6 +76,9 @@ static int qstat_show(struct seq_file *m
 		case SLSB_P_OUTPUT_NOT_INIT:
 			seq_printf(m, "N");
 			break;
+		case SLSB_P_OUTPUT_PENDING:
+			seq_printf(m, "P");
+			break;
 		case SLSB_P_INPUT_PRIMED:
 		case SLSB_CU_OUTPUT_PRIMED:
 			seq_printf(m, "+");
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -14,6 +14,7 @@
 #include <linux/timer.h>
 #include <linux/delay.h>
 #include <linux/gfp.h>
+#include <linux/io.h>
 #include <linux/kernel_stat.h>
 #include <asm/atomic.h>
 #include <asm/debug.h>
@@ -77,11 +78,13 @@ static inline int do_siga_input(unsigned
  * Note: For IQDC unicast queues only the highest priority queue is processed.
  */
 static inline int do_siga_output(unsigned long schid, unsigned long mask,
-				 unsigned int *bb, unsigned int fc)
+				 unsigned int *bb, unsigned int fc,
+				 unsigned long aob)
 {
 	register unsigned long __fc asm("0") = fc;
 	register unsigned long __schid asm("1") = schid;
 	register unsigned long __mask asm("2") = mask;
+	register unsigned long __aob asm("3") = aob;
 	int cc = QDIO_ERROR_SIGA_ACCESS_EXCEPTION;
 
 	asm volatile(
@@ -90,7 +93,8 @@ static inline int do_siga_output(unsigne
 		"	srl	%0,28\n"
 		"1:\n"
 		EX_TABLE(0b, 1b)
-		: "+d" (cc), "+d" (__fc), "+d" (__schid), "+d" (__mask)
+		: "+d" (cc), "+d" (__fc), "+d" (__schid), "+d" (__mask),
+		  "+d" (__aob)
 		: : "cc", "memory");
 	*bb = ((unsigned int) __fc) >> 31;
 	return cc;
@@ -212,7 +216,7 @@ again:
 /* returns number of examined buffers and their common state in *state */
 static inline int get_buf_states(struct qdio_q *q, unsigned int bufnr,
 				 unsigned char *state, unsigned int count,
-				 int auto_ack)
+				 int auto_ack, int merge_pending)
 {
 	unsigned char __state = 0;
 	int i;
@@ -224,9 +228,14 @@ static inline int get_buf_states(struct
 		return qdio_do_eqbs(q, state, bufnr, count, auto_ack);
 
 	for (i = 0; i < count; i++) {
-		if (!__state)
+		if (!__state) {
 			__state = q->slsb.val[bufnr];
-		else if (q->slsb.val[bufnr] != __state)
+			if (merge_pending && __state == SLSB_P_OUTPUT_PENDING)
+				__state = SLSB_P_OUTPUT_EMPTY;
+		} else if (merge_pending) {
+			if ((q->slsb.val[bufnr] & __state) != __state)
+				break;
+		} else if (q->slsb.val[bufnr] != __state)
 			break;
 		bufnr = next_buf(bufnr);
 	}
@@ -237,7 +246,7 @@ static inline int get_buf_states(struct
 static inline int get_buf_state(struct qdio_q *q, unsigned int bufnr,
 				unsigned char *state, int auto_ack)
 {
-	return get_buf_states(q, bufnr, state, 1, auto_ack);
+	return get_buf_states(q, bufnr, state, 1, auto_ack, 0);
 }
 
 /* wrap-around safe setting of slsb states, returns number of changed buffers */
@@ -308,19 +317,28 @@ static inline int qdio_siga_sync_q(struc
 		return qdio_siga_sync(q, q->mask, 0);
 }
 
-static int qdio_siga_output(struct qdio_q *q, unsigned int *busy_bit)
+static int qdio_siga_output(struct qdio_q *q, unsigned int *busy_bit,
+	unsigned long aob)
 {
 	unsigned long schid = *((u32 *) &q->irq_ptr->schid);
 	unsigned int fc = QDIO_SIGA_WRITE;
 	u64 start_time = 0;
 	int cc;
+	unsigned long laob = 0;
+
+	if (q->u.out.use_cq && aob != 0) {
+		fc = QDIO_SIGA_WRITEQ;
+		laob = aob;
+	}
 
 	if (is_qebsm(q)) {
 		schid = q->irq_ptr->sch_token;
 		fc |= QDIO_SIGA_QEBSM_FLAG;
 	}
 again:
-	cc = do_siga_output(schid, q->mask, busy_bit, fc);
+	WARN_ON_ONCE((aob && queue_type(q) != QDIO_IQDIO_QFMT) ||
+		(aob && fc != QDIO_SIGA_WRITEQ));
+	cc = do_siga_output(schid, q->mask, busy_bit, fc, laob);
 
 	/* hipersocket busy condition */
 	if (unlikely(*busy_bit)) {
@@ -373,7 +391,7 @@ int debug_get_buf_state(struct qdio_q *q
 {
 	if (need_siga_sync(q))
 		qdio_siga_sync_q(q);
-	return get_buf_states(q, bufnr, state, 1, 0);
+	return get_buf_states(q, bufnr, state, 1, 0, 0);
 }
 
 static inline void qdio_stop_polling(struct qdio_q *q)
@@ -501,7 +519,7 @@ static int get_inbound_buffer_frontier(s
 	 * No siga sync here, as a PCI or we after a thin interrupt
 	 * already sync'ed the queues.
 	 */
-	count = get_buf_states(q, q->first_to_check, &state, count, 1);
+	count = get_buf_states(q, q->first_to_check, &state, count, 1, 0);
 	if (!count)
 		goto out;
 
@@ -584,6 +602,107 @@ static inline int qdio_inbound_q_done(st
 		return 0;
 }
 
+static inline int contains_aobs(struct qdio_q *q)
+{
+	return !q->is_input_q && q->u.out.use_cq;
+}
+
+static inline void qdio_trace_aob(struct qdio_irq *irq, struct qdio_q *q,
+				int i, struct qaob *aob)
+{
+	int tmp;
+
+	DBF_DEV_EVENT(DBF_INFO, irq, "AOB%d:%lx", i,
+			(unsigned long) virt_to_phys(aob));
+	DBF_DEV_EVENT(DBF_INFO, irq, "RES00:%lx",
+			(unsigned long) aob->res0[0]);
+	DBF_DEV_EVENT(DBF_INFO, irq, "RES01:%lx",
+			(unsigned long) aob->res0[1]);
+	DBF_DEV_EVENT(DBF_INFO, irq, "RES02:%lx",
+			(unsigned long) aob->res0[2]);
+	DBF_DEV_EVENT(DBF_INFO, irq, "RES03:%lx",
+			(unsigned long) aob->res0[3]);
+	DBF_DEV_EVENT(DBF_INFO, irq, "RES04:%lx",
+			(unsigned long) aob->res0[4]);
+	DBF_DEV_EVENT(DBF_INFO, irq, "RES05:%lx",
+			(unsigned long) aob->res0[5]);
+	DBF_DEV_EVENT(DBF_INFO, irq, "RES1:%x", aob->res1);
+	DBF_DEV_EVENT(DBF_INFO, irq, "RES2:%x", aob->res2);
+	DBF_DEV_EVENT(DBF_INFO, irq, "RES3:%x", aob->res3);
+	DBF_DEV_EVENT(DBF_INFO, irq, "AORC:%u", aob->aorc);
+	DBF_DEV_EVENT(DBF_INFO, irq, "FLAGS:%u", aob->flags);
+	DBF_DEV_EVENT(DBF_INFO, irq, "CBTBS:%u", aob->cbtbs);
+	DBF_DEV_EVENT(DBF_INFO, irq, "SBC:%u", aob->sb_count);
+	for (tmp = 0; tmp < QDIO_MAX_ELEMENTS_PER_BUFFER; ++tmp) {
+		DBF_DEV_EVENT(DBF_INFO, irq, "SBA%d:%lx", tmp,
+				(unsigned long) aob->sba[tmp]);
+		DBF_DEV_EVENT(DBF_INFO, irq, "rSBA%d:%lx", tmp,
+				(unsigned long) q->sbal[i]->element[tmp].addr);
+		DBF_DEV_EVENT(DBF_INFO, irq, "DC%d:%u", tmp, aob->dcount[tmp]);
+		DBF_DEV_EVENT(DBF_INFO, irq, "rDC%d:%u", tmp,
+				q->sbal[i]->element[tmp].length);
+	}
+	DBF_DEV_EVENT(DBF_INFO, irq, "USER0:%lx", (unsigned long) aob->user0);
+	for (tmp = 0; tmp < 2; ++tmp) {
+		DBF_DEV_EVENT(DBF_INFO, irq, "RES4%d:%lx", tmp,
+			(unsigned long) aob->res4[tmp]);
+	}
+	DBF_DEV_EVENT(DBF_INFO, irq, "USER1:%lx", (unsigned long) aob->user1);
+	DBF_DEV_EVENT(DBF_INFO, irq, "USER2:%lx", (unsigned long) aob->user2);
+}
+
+static inline void qdio_handle_aobs(struct qdio_q *q, int start, int count)
+{
+	unsigned char state = 0;
+	int j, b = start;
+
+	if (!contains_aobs(q))
+		return;
+
+	for (j = 0; j < count; ++j) {
+		get_buf_state(q, b, &state, 0);
+		if (state == SLSB_P_OUTPUT_PENDING) {
+			struct qaob *aob = q->u.out.aobs[b];
+			if (aob == NULL)
+				continue;
+
+			BUG_ON(q->u.out.sbal_state == NULL);
+			q->u.out.sbal_state[b].flags |=
+				QDIO_OUTBUF_STATE_FLAG_PENDING;
+			q->u.out.aobs[b] = NULL;
+		} else if (state == SLSB_P_OUTPUT_EMPTY) {
+			BUG_ON(q->u.out.sbal_state == NULL);
+			q->u.out.sbal_state[b].aob = NULL;
+		}
+		b = next_buf(b);
+	}
+}
+
+static inline unsigned long qdio_aob_for_buffer(struct qdio_output_q *q,
+					int bufnr)
+{
+	unsigned long phys_aob = 0;
+
+	if (!q->use_cq)
+		goto out;
+
+	if (!q->aobs[bufnr]) {
+		struct qaob *aob = qdio_allocate_aob();
+		q->aobs[bufnr] = aob;
+	}
+	if (q->aobs[bufnr]) {
+		BUG_ON(q->sbal_state == NULL);
+		q->sbal_state[bufnr].flags = QDIO_OUTBUF_STATE_FLAG_NONE;
+		q->sbal_state[bufnr].aob = q->aobs[bufnr];
+		q->aobs[bufnr]->user1 = (u64) q->sbal_state[bufnr].user;
+		phys_aob = virt_to_phys(q->aobs[bufnr]);
+		BUG_ON(phys_aob & 0xFF);
+	}
+
+out:
+	return phys_aob;
+}
+
 static void qdio_kick_handler(struct qdio_q *q)
 {
 	int start = q->first_to_kick;
@@ -604,6 +723,8 @@ static void qdio_kick_handler(struct qdi
 			      start, count);
 	}
 
+	qdio_handle_aobs(q, start, count);
+
 	q->handler(q->irq_ptr->cdev, q->qdio_error, q->nr, start, count,
 		   q->irq_ptr->int_parm);
 
@@ -666,23 +787,26 @@ static int get_outbound_buffer_frontier(
 	 */
 	count = min(atomic_read(&q->nr_buf_used), QDIO_MAX_BUFFERS_MASK);
 	stop = add_buf(q->first_to_check, count);
-
 	if (q->first_to_check == stop)
-		return q->first_to_check;
+		goto out;
 
-	count = get_buf_states(q, q->first_to_check, &state, count, 0);
+	count = get_buf_states(q, q->first_to_check, &state, count, 0, 1);
 	if (!count)
-		return q->first_to_check;
+		goto out;
 
 	switch (state) {
+	case SLSB_P_OUTPUT_PENDING:
+		BUG();
 	case SLSB_P_OUTPUT_EMPTY:
 		/* the adapter got it */
-		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "out empty:%1d %02x", q->nr, count);
+		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr,
+			"out empty:%1d %02x", q->nr, count);
 
 		atomic_sub(count, &q->nr_buf_used);
 		q->first_to_check = add_buf(q->first_to_check, count);
 		if (q->irq_ptr->perf_stat_enabled)
 			account_sbals(q, count);
+
 		break;
 	case SLSB_P_OUTPUT_ERROR:
 		process_buffer_error(q, count);
@@ -695,7 +819,8 @@ static int get_outbound_buffer_frontier(
 		/* the adapter has not fetched the output yet */
 		if (q->irq_ptr->perf_stat_enabled)
 			q->q_stats.nr_sbal_nop++;
-		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "out primed:%1d", q->nr);
+		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "out primed:%1d",
+			      q->nr);
 		break;
 	case SLSB_P_OUTPUT_NOT_INIT:
 	case SLSB_P_OUTPUT_HALTED:
@@ -703,6 +828,8 @@ static int get_outbound_buffer_frontier(
 	default:
 		BUG();
 	}
+
+out:
 	return q->first_to_check;
 }
 
@@ -726,7 +853,7 @@ static inline int qdio_outbound_q_moved(
 		return 0;
 }
 
-static int qdio_kick_outbound_q(struct qdio_q *q)
+static int qdio_kick_outbound_q(struct qdio_q *q, unsigned long aob)
 {
 	unsigned int busy_bit;
 	int cc;
@@ -737,7 +864,7 @@ static int qdio_kick_outbound_q(struct q
 	DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "siga-w:%1d", q->nr);
 	qperf_inc(q, siga_write);
 
-	cc = qdio_siga_output(q, &busy_bit);
+	cc = qdio_siga_output(q, &busy_bit, aob);
 	switch (cc) {
 	case 0:
 		break;
@@ -906,8 +1033,9 @@ static void qdio_int_handler_pci(struct
 			}
 			q->u.in.queue_start_poll(q->irq_ptr->cdev, q->nr,
 						 q->irq_ptr->int_parm);
-		} else
+		} else {
 			tasklet_schedule(&q->tasklet);
+		}
 	}
 
 	if (!pci_out_supported(q))
@@ -1221,6 +1349,26 @@ out_err:
 }
 EXPORT_SYMBOL_GPL(qdio_allocate);
 
+static void qdio_detect_hsicq(struct qdio_irq *irq_ptr)
+{
+	struct qdio_q *q = irq_ptr->input_qs[0];
+	int i, use_cq = 0;
+
+	if (irq_ptr->nr_input_qs > 1 && queue_type(q) == QDIO_IQDIO_QFMT)
+		use_cq = 1;
+
+	for_each_output_queue(irq_ptr, q, i) {
+		if (use_cq) {
+			if (qdio_enable_async_operation(&q->u.out) < 0) {
+				use_cq = 0;
+				continue;
+			}
+		} else
+			qdio_disable_async_operation(&q->u.out);
+	}
+	DBF_EVENT("use_cq:%d", use_cq);
+}
+
 /**
  * qdio_establish - establish queues on a qdio subchannel
  * @init_data: initialization data
@@ -1286,6 +1434,8 @@ int qdio_establish(struct qdio_initializ
 	qdio_setup_ssqd_info(irq_ptr);
 	DBF_EVENT("qib ac:%4x", irq_ptr->qib.ac);
 
+	qdio_detect_hsicq(irq_ptr);
+
 	/* qebsm is now setup if available, initialize buffer states */
 	qdio_init_buf_states(irq_ptr);
 
@@ -1465,17 +1615,21 @@ static int handle_outbound(struct qdio_q
 		q->u.out.pci_out_enabled = 0;
 
 	if (queue_type(q) == QDIO_IQDIO_QFMT) {
-		/* One SIGA-W per buffer required for unicast HiperSockets. */
+		unsigned long phys_aob = 0;
+
+		/* One SIGA-W per buffer required for unicast HSI */
 		WARN_ON_ONCE(count > 1 && !multicast_outbound(q));
 
-		rc = qdio_kick_outbound_q(q);
+		phys_aob = qdio_aob_for_buffer(&q->u.out, bufnr);
+
+		rc = qdio_kick_outbound_q(q, phys_aob);
 	} else if (need_siga_sync(q)) {
 		rc = qdio_siga_sync_q(q);
 	} else {
 		/* try to fast requeue buffers */
 		get_buf_state(q, prev_buf(bufnr), &state, 0);
 		if (state != SLSB_CU_OUTPUT_PRIMED)
-			rc = qdio_kick_outbound_q(q);
+			rc = qdio_kick_outbound_q(q, 0);
 		else
 			qperf_inc(q, fast_requeue);
 	}
@@ -1503,6 +1657,7 @@ int do_QDIO(struct ccw_device *cdev, uns
 {
 	struct qdio_irq *irq_ptr;
 
+
 	if (bufnr >= QDIO_MAX_BUFFERS_PER_Q || count > QDIO_MAX_BUFFERS_PER_Q)
 		return -EINVAL;
 
@@ -1547,7 +1702,7 @@ int qdio_start_irq(struct ccw_device *cd
 
 	WARN_ON(queue_irqs_enabled(q));
 
-	if (!shared_ind(q->irq_ptr->dsci))
+	if (!shared_ind(q))
 		xchg(q->irq_ptr->dsci, 0);
 
 	qdio_stop_polling(q);
@@ -1557,7 +1712,7 @@ int qdio_start_irq(struct ccw_device *cd
 	 * We need to check again to not lose initiative after
 	 * resetting the ACK state.
 	 */
-	if (!shared_ind(q->irq_ptr->dsci) && *q->irq_ptr->dsci)
+	if (!shared_ind(q) && *q->irq_ptr->dsci)
 		goto rescan;
 	if (!qdio_inbound_q_done(q))
 		goto rescan;
--- a/drivers/s390/cio/qdio_setup.c
+++ b/drivers/s390/cio/qdio_setup.c
@@ -19,6 +19,22 @@
 #include "qdio_debug.h"
 
 static struct kmem_cache *qdio_q_cache;
+static struct kmem_cache *qdio_aob_cache;
+
+struct qaob *qdio_allocate_aob()
+{
+	struct qaob *aob;
+
+	aob = kmem_cache_zalloc(qdio_aob_cache, GFP_ATOMIC);
+	return aob;
+}
+EXPORT_SYMBOL_GPL(qdio_allocate_aob);
+
+void qdio_release_aob(struct qaob *aob)
+{
+	kmem_cache_free(qdio_aob_cache, aob);
+}
+EXPORT_SYMBOL_GPL(qdio_release_aob);
 
 /*
  * qebsm is only available under 64bit but the adapter sets the feature
@@ -154,29 +170,36 @@ static void setup_queues(struct qdio_irq
 	struct qdio_q *q;
 	void **input_sbal_array = qdio_init->input_sbal_addr_array;
 	void **output_sbal_array = qdio_init->output_sbal_addr_array;
+	struct qdio_outbuf_state *output_sbal_state_array =
+				  qdio_init->output_sbal_state_array;
 	int i;
 
 	for_each_input_queue(irq_ptr, q, i) {
-		DBF_EVENT("in-q:%1d", i);
+		DBF_EVENT("inq:%1d", i);
 		setup_queues_misc(q, irq_ptr, qdio_init->input_handler, i);
 
 		q->is_input_q = 1;
-		q->u.in.queue_start_poll = qdio_init->queue_start_poll;
+		q->u.in.queue_start_poll = qdio_init->queue_start_poll[i];
+
 		setup_storage_lists(q, irq_ptr, input_sbal_array, i);
 		input_sbal_array += QDIO_MAX_BUFFERS_PER_Q;
 
-		if (is_thinint_irq(irq_ptr))
+		if (is_thinint_irq(irq_ptr)) {
 			tasklet_init(&q->tasklet, tiqdio_inbound_processing,
 				     (unsigned long) q);
-		else
+		} else {
 			tasklet_init(&q->tasklet, qdio_inbound_processing,
 				     (unsigned long) q);
+		}
 	}
 
 	for_each_output_queue(irq_ptr, q, i) {
 		DBF_EVENT("outq:%1d", i);
 		setup_queues_misc(q, irq_ptr, qdio_init->output_handler, i);
 
+		q->u.out.sbal_state = output_sbal_state_array;
+		output_sbal_state_array += QDIO_MAX_BUFFERS_PER_Q;
+
 		q->is_input_q = 0;
 		q->u.out.scan_threshold = qdio_init->scan_threshold;
 		setup_storage_lists(q, irq_ptr, output_sbal_array, i);
@@ -311,6 +334,19 @@ void qdio_release_memory(struct qdio_irq
 	for (i = 0; i < QDIO_MAX_QUEUES_PER_IRQ; i++) {
 		q = irq_ptr->output_qs[i];
 		if (q) {
+			if (q->u.out.use_cq) {
+				int n;
+
+				for (n = 0; n < QDIO_MAX_BUFFERS_PER_Q; ++n) {
+					struct qaob *aob = q->u.out.aobs[n];
+					if (aob) {
+						qdio_release_aob(aob);
+						q->u.out.aobs[n] = NULL;
+					}
+				}
+
+				qdio_disable_async_operation(&q->u.out);
+			}
 			free_page((unsigned long) q->slib);
 			kmem_cache_free(qdio_q_cache, q);
 		}
@@ -465,23 +501,60 @@ void qdio_print_subchannel_info(struct q
 	printk(KERN_INFO "%s", s);
 }
 
+int qdio_enable_async_operation(struct qdio_output_q *outq)
+{
+	outq->aobs = kzalloc(sizeof(struct qaob *) * QDIO_MAX_BUFFERS_PER_Q,
+			     GFP_ATOMIC);
+	if (!outq->aobs) {
+		outq->use_cq = 0;
+		return -ENOMEM;
+	}
+	outq->use_cq = 1;
+	return 0;
+}
+
+void qdio_disable_async_operation(struct qdio_output_q *q)
+{
+	kfree(q->aobs);
+	q->aobs = NULL;
+	q->use_cq = 0;
+}
+
 int __init qdio_setup_init(void)
 {
+	int rc;
+
 	qdio_q_cache = kmem_cache_create("qdio_q", sizeof(struct qdio_q),
 					 256, 0, NULL);
 	if (!qdio_q_cache)
 		return -ENOMEM;
 
+	qdio_aob_cache = kmem_cache_create("qdio_aob",
+					sizeof(struct qaob),
+					sizeof(struct qaob),
+					0,
+					NULL);
+	if (!qdio_aob_cache) {
+		rc = -ENOMEM;
+		goto free_qdio_q_cache;
+	}
+
 	/* Check for OSA/FCP thin interrupts (bit 67). */
 	DBF_EVENT("thinint:%1d",
 		  (css_general_characteristics.aif_osa) ? 1 : 0);
 
 	/* Check for QEBSM support in general (bit 58). */
 	DBF_EVENT("cssQEBSM:%1d", (qebsm_possible()) ? 1 : 0);
-	return 0;
+	rc = 0;
+out:
+	return rc;
+free_qdio_q_cache:
+	kmem_cache_destroy(qdio_q_cache);
+	goto out;
 }
 
 void qdio_setup_exit(void)
 {
+	kmem_cache_destroy(qdio_aob_cache);
 	kmem_cache_destroy(qdio_q_cache);
 }
--- a/drivers/s390/cio/qdio_thinint.c
+++ b/drivers/s390/cio/qdio_thinint.c
@@ -67,12 +67,9 @@ static void put_indicator(u32 *addr)
 
 void tiqdio_add_input_queues(struct qdio_irq *irq_ptr)
 {
-	struct qdio_q *q;
-	int i;
-
 	mutex_lock(&tiq_list_lock);
-	for_each_input_queue(irq_ptr, q, i)
-		list_add_rcu(&q->entry, &tiq_list);
+	BUG_ON(irq_ptr->nr_input_qs < 1);
+	list_add_rcu(&irq_ptr->input_qs[0]->entry, &tiq_list);
 	mutex_unlock(&tiq_list_lock);
 	xchg(irq_ptr->dsci, 1 << 7);
 }
@@ -80,19 +77,17 @@ void tiqdio_add_input_queues(struct qdio
 void tiqdio_remove_input_queues(struct qdio_irq *irq_ptr)
 {
 	struct qdio_q *q;
-	int i;
 
-	for (i = 0; i < irq_ptr->nr_input_qs; i++) {
-		q = irq_ptr->input_qs[i];
-		/* if establish triggered an error */
-		if (!q || !q->entry.prev || !q->entry.next)
-			continue;
+	BUG_ON(irq_ptr->nr_input_qs < 1);
+	q = irq_ptr->input_qs[0];
+	/* if establish triggered an error */
+	if (!q || !q->entry.prev || !q->entry.next)
+		return;
 
-		mutex_lock(&tiq_list_lock);
-		list_del_rcu(&q->entry);
-		mutex_unlock(&tiq_list_lock);
-		synchronize_rcu();
-	}
+	mutex_lock(&tiq_list_lock);
+	list_del_rcu(&q->entry);
+	mutex_unlock(&tiq_list_lock);
+	synchronize_rcu();
 }
 
 static inline u32 shared_ind_set(void)
@@ -100,6 +95,40 @@ static inline u32 shared_ind_set(void)
 	return q_indicators[TIQDIO_SHARED_IND].ind;
 }
 
+static inline void tiqdio_call_inq_handlers(struct qdio_irq *irq)
+{
+	struct qdio_q *q;
+	int i;
+
+	for_each_input_queue(irq, q, i) {
+		if (!references_shared_dsci(irq) &&
+		    has_multiple_inq_on_dsci(irq))
+			xchg(q->irq_ptr->dsci, 0);
+
+		if (q->u.in.queue_start_poll) {
+			/* skip if polling is enabled or already in work */
+			if (test_and_set_bit(QDIO_QUEUE_IRQS_DISABLED,
+					     &q->u.in.queue_irq_state)) {
+				qperf_inc(q, int_discarded);
+				continue;
+			}
+
+			/* avoid dsci clear here, done after processing */
+			q->u.in.queue_start_poll(q->irq_ptr->cdev, q->nr,
+						 q->irq_ptr->int_parm);
+		} else {
+			if (!shared_ind(q))
+				xchg(q->irq_ptr->dsci, 0);
+
+			/*
+			 * Call inbound processing but not directly
+			 * since that could starve other thinint queues.
+			 */
+			tasklet_schedule(&q->tasklet);
+		}
+	}
+}
+
 /**
  * tiqdio_thinint_handler - thin interrupt handler for qdio
  * @alsi: pointer to adapter local summary indicator
@@ -118,35 +147,18 @@ static void tiqdio_thinint_handler(void
 
 	/* check for work on all inbound thinint queues */
 	list_for_each_entry_rcu(q, &tiq_list, entry) {
+		struct qdio_irq *irq;
 
 		/* only process queues from changed sets */
-		if (unlikely(shared_ind(q->irq_ptr->dsci))) {
+		irq = q->irq_ptr;
+		if (unlikely(references_shared_dsci(irq))) {
 			if (!si_used)
 				continue;
-		} else if (!*q->irq_ptr->dsci)
+		} else if (!*irq->dsci)
 			continue;
 
-		if (q->u.in.queue_start_poll) {
-			/* skip if polling is enabled or already in work */
-			if (test_and_set_bit(QDIO_QUEUE_IRQS_DISABLED,
-					     &q->u.in.queue_irq_state)) {
-				qperf_inc(q, int_discarded);
-				continue;
-			}
+		tiqdio_call_inq_handlers(irq);
 
-			/* avoid dsci clear here, done after processing */
-			q->u.in.queue_start_poll(q->irq_ptr->cdev, q->nr,
-						 q->irq_ptr->int_parm);
-		} else {
-			/* only clear it if the indicator is non-shared */
-			if (!shared_ind(q->irq_ptr->dsci))
-				xchg(q->irq_ptr->dsci, 0);
-			/*
-			 * Call inbound processing but not directly
-			 * since that could starve other thinint queues.
-			 */
-			tasklet_schedule(&q->tasklet);
-		}
 		qperf_inc(q, adapter_int);
 	}
 	rcu_read_unlock();
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -3937,6 +3937,7 @@ static int qeth_qdio_establish(struct qe
 	struct qdio_initialize init_data;
 	char *qib_param_field;
 	struct qdio_buffer **in_sbal_ptrs;
+	void (**queue_start_poll) (struct ccw_device *, int, unsigned long);
 	struct qdio_buffer **out_sbal_ptrs;
 	int i, j, k;
 	int rc = 0;
@@ -3945,8 +3946,10 @@ static int qeth_qdio_establish(struct qe
 
 	qib_param_field = kzalloc(QDIO_MAX_BUFFERS_PER_Q * sizeof(char),
 			      GFP_KERNEL);
-	if (!qib_param_field)
-		return -ENOMEM;
+	if (!qib_param_field) {
+		rc =  -ENOMEM;
+		goto out_free_nothing;
+	}
 
 	qeth_create_qib_param_field(card, qib_param_field);
 	qeth_create_qib_param_field_blkt(card, qib_param_field);
@@ -3954,20 +3957,26 @@ static int qeth_qdio_establish(struct qe
 	in_sbal_ptrs = kmalloc(QDIO_MAX_BUFFERS_PER_Q * sizeof(void *),
 			       GFP_KERNEL);
 	if (!in_sbal_ptrs) {
-		kfree(qib_param_field);
-		return -ENOMEM;
+		rc = -ENOMEM;
+		goto out_free_qib_param;
 	}
 	for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i)
 		in_sbal_ptrs[i] = (struct qdio_buffer *)
 			virt_to_phys(card->qdio.in_q->bufs[i].buffer);
 
+	queue_start_poll = kmalloc(sizeof(void *) * 1, GFP_KERNEL);
+	if (!queue_start_poll) {
+		rc = -ENOMEM;
+		goto out_free_in_sbals;
+	}
+	queue_start_poll[0] = card->discipline.start_poll;
+
 	out_sbal_ptrs =
 		kmalloc(card->qdio.no_out_queues * QDIO_MAX_BUFFERS_PER_Q *
 			sizeof(void *), GFP_KERNEL);
 	if (!out_sbal_ptrs) {
-		kfree(in_sbal_ptrs);
-		kfree(qib_param_field);
-		return -ENOMEM;
+		rc = -ENOMEM;
+		goto out_free_queue_start_poll;
 	}
 	for (i = 0, k = 0; i < card->qdio.no_out_queues; ++i)
 		for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j, ++k) {
@@ -3984,7 +3993,7 @@ static int qeth_qdio_establish(struct qe
 	init_data.no_output_qs           = card->qdio.no_out_queues;
 	init_data.input_handler          = card->discipline.input_handler;
 	init_data.output_handler         = card->discipline.output_handler;
-	init_data.queue_start_poll	 = card->discipline.start_poll;
+	init_data.queue_start_poll       = queue_start_poll;
 	init_data.int_parm               = (unsigned long) card;
 	init_data.input_sbal_addr_array  = (void **) in_sbal_ptrs;
 	init_data.output_sbal_addr_array = (void **) out_sbal_ptrs;
@@ -4006,8 +4015,13 @@ static int qeth_qdio_establish(struct qe
 	}
 out:
 	kfree(out_sbal_ptrs);
+out_free_queue_start_poll:
+	kfree(queue_start_poll);
+out_free_in_sbals:
 	kfree(in_sbal_ptrs);
+out_free_qib_param:
 	kfree(qib_param_field);
+out_free_nothing:
 	return rc;
 }
 


^ permalink raw reply

* [patch 00/11] [RFC] s390: af_iucv traffic over HiperSockets transport
From: frank.blaschka @ 2011-07-27 16:12 UTC (permalink / raw)
  To: netdev, linux-s390

Following patch set implements a new feature to use HiperSockets
as a transport mechanism for the af_iucv address family. Basic
design idea is: The current transport mechanism for af_iucv (iucv)
is only available on VM. HiperSockets provide similar capabilities
as iucv and are available on LPAR. There is guaranty for data
integrity, in order delivery and delivery acknowledgment. A new
HiperSockets feature is exploited to support
flow control/congestion management. For this it is nearby to extend
af_iucv address family to use HiperSockets transport.

shortlog:

Einar Lueck (4)
qdio: support asynchronous delivery of storage blocks
qdio: support forced signal adapter indications
qeth: exploit asynchronous delivery of storage blocks
qeth: support forced signal adapter indications

Ursula Braun (3)
iucv: kernel option for z/VM IUCV and HiperSockets
af_iucv: cleanup - use iucv_sk(sk) early
af_iucv: add HiperSockets transport

Frank Blaschka (4)
iucv: introduce loadable iucv interface
af_iucv: use loadable iucv interface
if_ether: add new Ethernet Protocol ID for af_iucv
qeth: add support for af_iucv HiperSockets transport

Thanks for review and comments,
        Frank

^ permalink raw reply

* [patch 02/11] [PATCH] iucv: kernel option for z/VM IUCV and HiperSockets
From: frank.blaschka @ 2011-07-27 16:12 UTC (permalink / raw)
  To: netdev, linux-s390; +Cc: Ursula Braun
In-Reply-To: <20110727161242.018577444@de.ibm.com>

[-- Attachment #1: net1117-af_iucv-kernel-option-for-z-VM-IUCV-and-HiperSockets.patch transports --]
[-- Type: text/plain, Size: 1357 bytes --]

From: Ursula Braun <ursula.braun@de.ibm.com>

When adding HiperSockets transport to AF_IUCV Sockets, af_iucv either
depends on IUCV or QETH_L3 (or both). This patch introduces the
necessary changes for kernel configuration.

Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
---
 net/iucv/Kconfig |   14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

--- a/net/iucv/Kconfig
+++ b/net/iucv/Kconfig
@@ -1,15 +1,15 @@
 config IUCV
-	tristate "IUCV support (S390 - z/VM only)"
-	depends on S390
+	def_tristate y if S390
+	prompt "IUCV support (S390 - z/VM only)"
 	help
 	  Select this option if you want to use inter-user communication
 	  under VM or VIF. If you run on z/VM, say "Y" to enable a fast
 	  communication link between VM guests.
 
 config AFIUCV
-	tristate "AF_IUCV support (S390 - z/VM only)"
-	depends on IUCV
+	def_tristate m if QETH_L3 || IUCV
+	prompt "AF_IUCV Socket support (S390 - z/VM and HiperSockets transport)"
 	help
-	  Select this option if you want to use inter-user communication under
-	  VM or VIF sockets. If you run on z/VM, say "Y" to enable a fast
-	  communication link between VM guests.
+	  Select this option if you want to use AF_IUCV socket applications
+	  based on z/VM inter-user communication vehicle or based on
+	  HiperSockets.


^ permalink raw reply

* [patch 01/11] [PATCH] iucv: introduce loadable iucv interface
From: frank.blaschka @ 2011-07-27 16:12 UTC (permalink / raw)
  To: netdev, linux-s390
In-Reply-To: <20110727161242.018577444@de.ibm.com>

[-- Attachment #1: net1117-iucv-introduce-loadable-iucv-interface.patch --]
[-- Type: text/plain, Size: 3456 bytes --]

From: Frank Blaschka <frank.blaschka@de.ibm.com>

This patch adds a symbol to dynamically load iucv functions.

Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
---
 include/net/iucv/iucv.h |   36 +++++++++++++++++++++++++++++++++++-
 net/iucv/iucv.c         |   23 +++++++++++++++++++++++
 2 files changed, 58 insertions(+), 1 deletion(-)

--- a/include/net/iucv/iucv.h
+++ b/include/net/iucv/iucv.h
@@ -120,7 +120,7 @@ struct iucv_message {
 	u32 reply_size;
 	u8  rmmsg[8];
 	u8  flags;
-};
+} __packed;
 
 /*
  * struct iucv_handler
@@ -459,3 +459,37 @@ int __iucv_message_send(struct iucv_path
 int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg,
 			  u8 flags, u32 srccls, void *buffer, size_t size,
 			  void *answer, size_t asize, size_t *residual);
+
+struct iucv_interface {
+	int (*message_receive)(struct iucv_path *path, struct iucv_message *msg,
+		u8 flags, void *buffer, size_t size, size_t *residual);
+	int (*__message_receive)(struct iucv_path *path,
+		struct iucv_message *msg, u8 flags, void *buffer, size_t size,
+		size_t *residual);
+	int (*message_reply)(struct iucv_path *path, struct iucv_message *msg,
+		u8 flags, void *reply, size_t size);
+	int (*message_reject)(struct iucv_path *path, struct iucv_message *msg);
+	int (*message_send)(struct iucv_path *path, struct iucv_message *msg,
+		u8 flags, u32 srccls, void *buffer, size_t size);
+	int (*__message_send)(struct iucv_path *path, struct iucv_message *msg,
+		u8 flags, u32 srccls, void *buffer, size_t size);
+	int (*message_send2way)(struct iucv_path *path,
+		struct iucv_message *msg, u8 flags, u32 srccls, void *buffer,
+		size_t size, void *answer, size_t asize, size_t *residual);
+	int (*message_purge)(struct iucv_path *path, struct iucv_message *msg,
+		u32 srccls);
+	int (*path_accept)(struct iucv_path *path, struct iucv_handler *handler,
+		u8 userdata[16], void *private);
+	int (*path_connect)(struct iucv_path *path,
+		struct iucv_handler *handler,
+		u8 userid[8], u8 system[8], u8 userdata[16], void *private);
+	int (*path_quiesce)(struct iucv_path *path, u8 userdata[16]);
+	int (*path_resume)(struct iucv_path *path, u8 userdata[16]);
+	int (*path_sever)(struct iucv_path *path, u8 userdata[16]);
+	int (*iucv_register)(struct iucv_handler *handler, int smp);
+	void (*iucv_unregister)(struct iucv_handler *handler, int smp);
+	struct bus_type *bus;
+	struct device *root;
+};
+
+extern struct iucv_interface iucv_if;
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -1974,6 +1974,27 @@ out:
 	return rc;
 }
 
+struct iucv_interface iucv_if = {
+	.message_receive = iucv_message_receive,
+	.__message_receive = __iucv_message_receive,
+	.message_reply = iucv_message_reply,
+	.message_reject = iucv_message_reject,
+	.message_send = iucv_message_send,
+	.__message_send = __iucv_message_send,
+	.message_send2way = iucv_message_send2way,
+	.message_purge = iucv_message_purge,
+	.path_accept = iucv_path_accept,
+	.path_connect = iucv_path_connect,
+	.path_quiesce = iucv_path_quiesce,
+	.path_resume = iucv_path_resume,
+	.path_sever = iucv_path_sever,
+	.iucv_register = iucv_register,
+	.iucv_unregister = iucv_unregister,
+	.bus = NULL,
+	.root = NULL,
+};
+EXPORT_SYMBOL(iucv_if);
+
 /**
  * iucv_init
  *
@@ -2037,6 +2058,8 @@ static int __init iucv_init(void)
 	rc = bus_register(&iucv_bus);
 	if (rc)
 		goto out_reboot;
+	iucv_if.root = iucv_root;
+	iucv_if.bus = &iucv_bus;
 	return 0;
 
 out_reboot:


^ permalink raw reply

* [patch 05/11] [PATCH] if_ether: add new Ethernet Protocol ID for af_iucv
From: frank.blaschka @ 2011-07-27 16:12 UTC (permalink / raw)
  To: netdev, linux-s390
In-Reply-To: <20110727161242.018577444@de.ibm.com>

[-- Attachment #1: net1117-af_iucv-new-protocol-id.patch --]
[-- Type: text/plain, Size: 823 bytes --]

From: Frank Blaschka <frank.blaschka@de.ibm.com>

Add a new ethertype for af_iucv over s/390 HiperSockets transport. Since
HiperSockets is not a real ethernet hw this is not an officially registered ID.

Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
---
 include/linux/if_ether.h |    1 +
 1 file changed, 1 insertion(+)

--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -88,6 +88,7 @@
 #define ETH_P_QINQ2	0x9200		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
 #define ETH_P_QINQ3	0x9300		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
 #define ETH_P_EDSA	0xDADA		/* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
+#define ETH_P_AF_IUCV   0xFBFB		/* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */
 
 /*
  *	Non DIX types. Won't clash for 1500 types.


^ permalink raw reply

* [patch 09/11] [PATCH] qeth: support forced signal adapter indications
From: frank.blaschka @ 2011-07-27 16:12 UTC (permalink / raw)
  To: netdev, linux-s390; +Cc: Einar Lueck
In-Reply-To: <20110727161242.018577444@de.ibm.com>

[-- Attachment #1: net1117_qeth_siga_r.patch --]
[-- Type: text/plain, Size: 907 bytes --]

From: Einar Lueck <elelueck@de.ibm.com>

This patch ensures that signal adapter commands are issued if they are
indicated to be required.

Signed-off-by: Einar Lueck <elelueck@de.ibm.com>
Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
---
 drivers/s390/net/qeth_core.h |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -231,7 +231,8 @@ static inline int qeth_is_ipa_enabled(st
 #define QETH_IN_BUF_COUNT_MAX 128
 #define QETH_MAX_BUFFER_ELEMENTS(card) ((card)->qdio.in_buf_size >> 12)
 #define QETH_IN_BUF_REQUEUE_THRESHOLD(card) \
-		((card)->qdio.in_buf_pool.buf_count / 2)
+		((card)->ssqd.qdioac1 & AC1_SIGA_INPUT_NEEDED ? 1 : \
+		 ((card)->qdio.in_buf_pool.buf_count / 2))
 
 /* buffers we have to be behind before we get a PCI */
 #define QETH_PCI_THRESHOLD_A(card) ((card)->qdio.in_buf_pool.buf_count+1)


^ permalink raw reply

* [patch 08/11] [PATCH] qdio: support forced signal adapter indications
From: frank.blaschka @ 2011-07-27 16:12 UTC (permalink / raw)
  To: netdev, linux-s390; +Cc: Einar Lueck, Jan Glauber
In-Reply-To: <20110727161242.018577444@de.ibm.com>

[-- Attachment #1: net1117_qdio_siga_r.patch --]
[-- Type: text/plain, Size: 2398 bytes --]

From: Einar Lueck <elelueck@de.ibm.com>

This patch ensures that signal adapter commands are issued if they are 
indicated to be required.

Signed-off-by: Einar Lueck <elelueck@de.ibm.com>
Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
---
 arch/s390/include/asm/qdio.h |   10 ++++++++++
 drivers/s390/cio/qdio.h      |    9 ---------
 drivers/s390/cio/qdio_main.c |    5 +----
 3 files changed, 11 insertions(+), 13 deletions(-)

--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -279,6 +279,16 @@ struct qdio_outbuf_state {
 
 #define CHSC_AC1_INITIATE_INPUTQ	0x80
 
+
+/* qdio adapter-characteristics-1 flag */
+#define AC1_SIGA_INPUT_NEEDED		0x40	/* process input queues */
+#define AC1_SIGA_OUTPUT_NEEDED		0x20	/* process output queues */
+#define AC1_SIGA_SYNC_NEEDED		0x10	/* ask hypervisor to sync */
+#define AC1_AUTOMATIC_SYNC_ON_THININT	0x08	/* set by hypervisor */
+#define AC1_AUTOMATIC_SYNC_ON_OUT_PCI	0x04	/* set by hypervisor */
+#define AC1_SC_QEBSM_AVAILABLE		0x02	/* available for subchannel */
+#define AC1_SC_QEBSM_ENABLED		0x01	/* enabled for subchannel */
+
 #define CHSC_AC2_DATA_DIV_AVAILABLE	0x0010
 #define CHSC_AC2_DATA_DIV_ENABLED	0x0002
 
--- a/drivers/s390/cio/qdio.h
+++ b/drivers/s390/cio/qdio.h
@@ -85,15 +85,6 @@ enum qdio_irq_states {
 #define CHSC_FLAG_QDIO_CAPABILITY	0x80
 #define CHSC_FLAG_VALIDITY		0x40
 
-/* qdio adapter-characteristics-1 flag */
-#define AC1_SIGA_INPUT_NEEDED		0x40	/* process input queues */
-#define AC1_SIGA_OUTPUT_NEEDED		0x20	/* process output queues */
-#define AC1_SIGA_SYNC_NEEDED		0x10	/* ask hypervisor to sync */
-#define AC1_AUTOMATIC_SYNC_ON_THININT	0x08	/* set by hypervisor */
-#define AC1_AUTOMATIC_SYNC_ON_OUT_PCI	0x04	/* set by hypervisor */
-#define AC1_SC_QEBSM_AVAILABLE		0x02	/* available for subchannel */
-#define AC1_SC_QEBSM_ENABLED		0x01	/* enabled for subchannel */
-
 /* SIGA flags */
 #define QDIO_SIGA_WRITE		0x00
 #define QDIO_SIGA_READ		0x01
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -1577,12 +1577,9 @@ set:
 	used = atomic_add_return(count, &q->nr_buf_used) - count;
 	BUG_ON(used + count > QDIO_MAX_BUFFERS_PER_Q);
 
-	/* no need to signal as long as the adapter had free buffers */
-	if (used)
-		return 0;
-
 	if (need_siga_in(q))
 		return qdio_siga_input(q);
+
 	return 0;
 }
 


^ permalink raw reply

* [patch 03/11] [PATCH] af_iucv: use loadable iucv interface
From: frank.blaschka @ 2011-07-27 16:12 UTC (permalink / raw)
  To: netdev, linux-s390
In-Reply-To: <20110727161242.018577444@de.ibm.com>

[-- Attachment #1: net1117-af_iucv-use-loadable-iucv-interface.patch --]
[-- Type: text/plain, Size: 8680 bytes --]

From: Frank Blaschka <frank.blaschka@de.ibm.com>

For future af_iucv extensions the module should be able to run in LPAR
mode too. For this we use the new dynamic loading iucv interface.

Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
---
 net/iucv/af_iucv.c |  119 ++++++++++++++++++++++++++++++++---------------------
 1 file changed, 74 insertions(+), 45 deletions(-)

--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -42,6 +42,8 @@ static struct proto iucv_proto = {
 	.obj_size	= sizeof(struct iucv_sock),
 };
 
+static struct iucv_interface *pr_iucv;
+
 /* special AF_IUCV IPRM messages */
 static const u8 iprm_shutdown[8] =
 	{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01};
@@ -165,7 +167,7 @@ static int afiucv_pm_freeze(struct devic
 		case IUCV_CLOSING:
 		case IUCV_CONNECTED:
 			if (iucv->path) {
-				err = iucv_path_sever(iucv->path, NULL);
+				err = pr_iucv->path_sever(iucv->path, NULL);
 				iucv_path_free(iucv->path);
 				iucv->path = NULL;
 			}
@@ -229,7 +231,7 @@ static const struct dev_pm_ops afiucv_pm
 static struct device_driver af_iucv_driver = {
 	.owner = THIS_MODULE,
 	.name = "afiucv",
-	.bus  = &iucv_bus,
+	.bus  = NULL,
 	.pm   = &afiucv_pm_ops,
 };
 
@@ -412,7 +414,7 @@ static void iucv_sock_close(struct sock 
 			low_nmcpy(user_data, iucv->src_name);
 			high_nmcpy(user_data, iucv->dst_name);
 			ASCEBC(user_data, sizeof(user_data));
-			iucv_path_sever(iucv->path, user_data);
+			pr_iucv->path_sever(iucv->path, user_data);
 			iucv_path_free(iucv->path);
 			iucv->path = NULL;
 		}
@@ -704,8 +706,9 @@ static int iucv_sock_connect(struct sock
 		err = -ENOMEM;
 		goto done;
 	}
-	err = iucv_path_connect(iucv->path, &af_iucv_handler,
-				sa->siucv_user_id, NULL, user_data, sk);
+	err = pr_iucv->path_connect(iucv->path, &af_iucv_handler,
+				    sa->siucv_user_id, NULL, user_data,
+				    sk);
 	if (err) {
 		iucv_path_free(iucv->path);
 		iucv->path = NULL;
@@ -738,7 +741,7 @@ static int iucv_sock_connect(struct sock
 	}
 
 	if (err) {
-		iucv_path_sever(iucv->path, NULL);
+		pr_iucv->path_sever(iucv->path, NULL);
 		iucv_path_free(iucv->path);
 		iucv->path = NULL;
 	}
@@ -871,7 +874,7 @@ static int iucv_send_iprm(struct iucv_pa
 
 	memcpy(prmdata, (void *) skb->data, skb->len);
 	prmdata[7] = 0xff - (u8) skb->len;
-	return iucv_message_send(path, msg, IUCV_IPRMDATA, 0,
+	return pr_iucv->message_send(path, msg, IUCV_IPRMDATA, 0,
 				 (void *) prmdata, 8);
 }
 
@@ -999,13 +1002,13 @@ static int iucv_sock_sendmsg(struct kioc
 		/* this error should never happen since the
 		 * IUCV_IPRMDATA path flag is set... sever path */
 		if (err == 0x15) {
-			iucv_path_sever(iucv->path, NULL);
+			pr_iucv->path_sever(iucv->path, NULL);
 			skb_unlink(skb, &iucv->send_skb_q);
 			err = -EPIPE;
 			goto fail;
 		}
 	} else
-		err = iucv_message_send(iucv->path, &txmsg, 0, 0,
+		err = pr_iucv->message_send(iucv->path, &txmsg, 0, 0,
 					(void *) skb->data, skb->len);
 	if (err) {
 		if (err == 3) {
@@ -1095,8 +1098,9 @@ static void iucv_process_message(struct 
 			skb->len = 0;
 		}
 	} else {
-		rc = iucv_message_receive(path, msg, msg->flags & IUCV_IPRMDATA,
-					  skb->data, len, NULL);
+		rc = pr_iucv->message_receive(path, msg,
+					      msg->flags & IUCV_IPRMDATA,
+					      skb->data, len, NULL);
 		if (rc) {
 			kfree_skb(skb);
 			return;
@@ -1110,7 +1114,7 @@ static void iucv_process_message(struct 
 			kfree_skb(skb);
 			skb = NULL;
 			if (rc) {
-				iucv_path_sever(path, NULL);
+				pr_iucv->path_sever(path, NULL);
 				return;
 			}
 			skb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q);
@@ -1327,8 +1331,8 @@ static int iucv_sock_shutdown(struct soc
 	if (how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) {
 		txmsg.class = 0;
 		txmsg.tag = 0;
-		err = iucv_message_send(iucv->path, &txmsg, IUCV_IPRMDATA, 0,
-					(void *) iprm_shutdown, 8);
+		err = pr_iucv->message_send(iucv->path, &txmsg, IUCV_IPRMDATA,
+					0, (void *) iprm_shutdown, 8);
 		if (err) {
 			switch (err) {
 			case 1:
@@ -1345,7 +1349,7 @@ static int iucv_sock_shutdown(struct soc
 	}
 
 	if (how == RCV_SHUTDOWN || how == SHUTDOWN_MASK) {
-		err = iucv_path_quiesce(iucv_sk(sk)->path, NULL);
+		err = pr_iucv->path_quiesce(iucv->path, NULL);
 		if (err)
 			err = -ENOTCONN;
 
@@ -1372,7 +1376,7 @@ static int iucv_sock_release(struct sock
 
 	/* Unregister with IUCV base support */
 	if (iucv_sk(sk)->path) {
-		iucv_path_sever(iucv_sk(sk)->path, NULL);
+		pr_iucv->path_sever(iucv_sk(sk)->path, NULL);
 		iucv_path_free(iucv_sk(sk)->path);
 		iucv_sk(sk)->path = NULL;
 	}
@@ -1514,14 +1518,14 @@ static int iucv_callback_connreq(struct 
 	high_nmcpy(user_data, iucv->dst_name);
 	ASCEBC(user_data, sizeof(user_data));
 	if (sk->sk_state != IUCV_LISTEN) {
-		err = iucv_path_sever(path, user_data);
+		err = pr_iucv->path_sever(path, user_data);
 		iucv_path_free(path);
 		goto fail;
 	}
 
 	/* Check for backlog size */
 	if (sk_acceptq_is_full(sk)) {
-		err = iucv_path_sever(path, user_data);
+		err = pr_iucv->path_sever(path, user_data);
 		iucv_path_free(path);
 		goto fail;
 	}
@@ -1529,7 +1533,7 @@ static int iucv_callback_connreq(struct 
 	/* Create the new socket */
 	nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC);
 	if (!nsk) {
-		err = iucv_path_sever(path, user_data);
+		err = pr_iucv->path_sever(path, user_data);
 		iucv_path_free(path);
 		goto fail;
 	}
@@ -1553,9 +1557,9 @@ static int iucv_callback_connreq(struct 
 	/* set message limit for path based on msglimit of accepting socket */
 	niucv->msglimit = iucv->msglimit;
 	path->msglim = iucv->msglimit;
-	err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk);
+	err = pr_iucv->path_accept(path, &af_iucv_handler, nuser_data, nsk);
 	if (err) {
-		err = iucv_path_sever(path, user_data);
+		err = pr_iucv->path_sever(path, user_data);
 		iucv_path_free(path);
 		iucv_sock_kill(nsk);
 		goto fail;
@@ -1589,7 +1593,7 @@ static void iucv_callback_rx(struct iucv
 	int len;
 
 	if (sk->sk_shutdown & RCV_SHUTDOWN) {
-		iucv_message_reject(path, msg);
+		pr_iucv->message_reject(path, msg);
 		return;
 	}
 
@@ -1718,6 +1722,41 @@ static const struct net_proto_family iuc
 	.create	= iucv_sock_create,
 };
 
+static int __init afiucv_iucv_init(void)
+{
+	int err;
+
+	err = pr_iucv->iucv_register(&af_iucv_handler, 0);
+	if (err)
+		goto out;
+	/* establish dummy device */
+	af_iucv_driver.bus = pr_iucv->bus;
+	err = driver_register(&af_iucv_driver);
+	if (err)
+		goto out_iucv;
+	af_iucv_dev = kzalloc(sizeof(struct device), GFP_KERNEL);
+	if (!af_iucv_dev) {
+		err = -ENOMEM;
+		goto out_driver;
+	}
+	dev_set_name(af_iucv_dev, "af_iucv");
+	af_iucv_dev->bus = pr_iucv->bus;
+	af_iucv_dev->parent = pr_iucv->root;
+	af_iucv_dev->release = (void (*)(struct device *))kfree;
+	af_iucv_dev->driver = &af_iucv_driver;
+	err = device_register(af_iucv_dev);
+	if (err)
+		goto out_driver;
+	return 0;
+
+out_driver:
+	driver_unregister(&af_iucv_driver);
+out_iucv:
+	pr_iucv->iucv_unregister(&af_iucv_handler, 0);
+out:
+	return err;
+}
+
 static int __init afiucv_init(void)
 {
 	int err;
@@ -1735,44 +1774,33 @@ static int __init afiucv_init(void)
 		goto out;
 	}
 
-	err = iucv_register(&af_iucv_handler, 0);
-	if (err)
+	pr_iucv = try_then_request_module(symbol_get(iucv_if), "iucv");
+	if (!pr_iucv) {
+		printk(KERN_WARNING "iucv_if lookup failed\n");
+		err = -EPROTONOSUPPORT;
 		goto out;
+	}
+
 	err = proto_register(&iucv_proto, 0);
 	if (err)
-		goto out_iucv;
+		goto out;
 	err = sock_register(&iucv_sock_family_ops);
 	if (err)
 		goto out_proto;
-	/* establish dummy device */
-	err = driver_register(&af_iucv_driver);
+
+	err = afiucv_iucv_init();
 	if (err)
 		goto out_sock;
-	af_iucv_dev = kzalloc(sizeof(struct device), GFP_KERNEL);
-	if (!af_iucv_dev) {
-		err = -ENOMEM;
-		goto out_driver;
-	}
-	dev_set_name(af_iucv_dev, "af_iucv");
-	af_iucv_dev->bus = &iucv_bus;
-	af_iucv_dev->parent = iucv_root;
-	af_iucv_dev->release = (void (*)(struct device *))kfree;
-	af_iucv_dev->driver = &af_iucv_driver;
-	err = device_register(af_iucv_dev);
-	if (err)
-		goto out_driver;
 
 	return 0;
 
-out_driver:
-	driver_unregister(&af_iucv_driver);
 out_sock:
 	sock_unregister(PF_IUCV);
 out_proto:
 	proto_unregister(&iucv_proto);
-out_iucv:
-	iucv_unregister(&af_iucv_handler, 0);
 out:
+	if (pr_iucv)
+		symbol_put(iucv_if);
 	return err;
 }
 
@@ -1780,9 +1808,10 @@ static void __exit afiucv_exit(void)
 {
 	device_unregister(af_iucv_dev);
 	driver_unregister(&af_iucv_driver);
+	pr_iucv->iucv_unregister(&af_iucv_handler, 0);
+	symbol_put(iucv_if);
 	sock_unregister(PF_IUCV);
 	proto_unregister(&iucv_proto);
-	iucv_unregister(&af_iucv_handler, 0);
 }
 
 module_init(afiucv_init);


^ permalink raw reply

* [patch 10/11] [PATCH] qeth: exploit asynchronous delivery of storage blocks
From: frank.blaschka @ 2011-07-27 16:12 UTC (permalink / raw)
  To: netdev, linux-s390; +Cc: Einar Lueck
In-Reply-To: <20110727161242.018577444@de.ibm.com>

[-- Attachment #1: net1117_qeth_cq.patch --]
[-- Type: text/plain, Size: 28255 bytes --]

From: Einar Lueck <elelueck@de.ibm.com>

This patch exploits the QDIO support for asynchronous delivery of storage 
blocks for Hipersockets. The exploitation is not configured per default and
may be enabled via the function qeth_configure_cq.

Signed-off-by: Einar Lueck <elelueck@de.ibm.com>
Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
---
 drivers/s390/net/qeth_core.h      |   28 +-
 drivers/s390/net/qeth_core_main.c |  495 ++++++++++++++++++++++++++++++++++----
 2 files changed, 480 insertions(+), 43 deletions(-)

--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -110,6 +110,10 @@ struct qeth_perf_stats {
 
 	unsigned int sc_dp_p;
 	unsigned int sc_p_dp;
+	/* qdio_cq_handler: number of times called, time spent in */
+	__u64 cq_start_time;
+	unsigned int cq_cnt;
+	unsigned int cq_time;
 	/* qdio_input_handler: number of times called, time spent in */
 	__u64 inbound_start_time;
 	unsigned int inbound_cnt;
@@ -376,6 +380,11 @@ enum qeth_qdio_buffer_states {
 	 * outbound: filled by driver; owned by hardware in order to be sent
 	 */
 	QETH_QDIO_BUF_PRIMED,
+	/*
+	 * inbound: not applicable
+	 * outbound: handled via transfer pending / completion queue
+	 */
+	QETH_QDIO_BUF_HANDLED_DELAYED,
 };
 
 enum qeth_qdio_info_states {
@@ -413,8 +422,11 @@ struct qeth_qdio_out_buffer {
 	atomic_t state;
 	int next_element_to_fill;
 	struct sk_buff_head skb_list;
-	struct list_head ctx_list;
 	int is_header[16];
+
+	struct qaob *aob;
+	struct qeth_qdio_out_q *q;
+	struct qeth_qdio_out_buffer *next_pending;
 };
 
 struct qeth_card;
@@ -427,7 +439,8 @@ enum qeth_out_q_states {
 
 struct qeth_qdio_out_q {
 	struct qdio_buffer qdio_bufs[QDIO_MAX_BUFFERS_PER_Q];
-	struct qeth_qdio_out_buffer bufs[QDIO_MAX_BUFFERS_PER_Q];
+	struct qeth_qdio_out_buffer *bufs[QDIO_MAX_BUFFERS_PER_Q];
+	struct qdio_outbuf_state *bufstates; /* convenience pointer */
 	int queue_no;
 	struct qeth_card *card;
 	atomic_t state;
@@ -448,7 +461,9 @@ struct qeth_qdio_out_q {
 struct qeth_qdio_info {
 	atomic_t state;
 	/* input */
+	int no_in_queues;
 	struct qeth_qdio_q *in_q;
+	struct qeth_qdio_q *c_q;
 	struct qeth_qdio_buffer_pool in_buf_pool;
 	struct qeth_qdio_buffer_pool init_pool;
 	int in_buf_size;
@@ -456,6 +471,7 @@ struct qeth_qdio_info {
 	/* output */
 	int no_out_queues;
 	struct qeth_qdio_out_q **out_qs;
+	struct qdio_outbuf_state *out_bufstates;
 
 	/* priority queueing */
 	int do_prio_queueing;
@@ -527,6 +543,12 @@ enum qeth_cmd_buffer_state {
 	BUF_STATE_PROCESSED,
 };
 
+enum qeth_cq {
+	QETH_CQ_DISABLED = 0,
+	QETH_CQ_ENABLED = 1,
+	QETH_CQ_NOTAVAILABLE = 2,
+};
+
 struct qeth_ipato {
 	int enabled;
 	int invert4;
@@ -651,6 +673,7 @@ struct qeth_card_options {
 	int rx_sg_cb;
 	enum qeth_ipa_isolation_modes isolation;
 	int sniffer;
+	enum qeth_cq cq;
 };
 
 /*
@@ -888,6 +911,7 @@ void qeth_dbf_longtext(debug_info_t *id,
 int qeth_core_ethtool_get_settings(struct net_device *, struct ethtool_cmd *);
 int qeth_set_access_ctrl_online(struct qeth_card *card);
 int qeth_hdr_chk_and_bounce(struct sk_buff *, int);
+int qeth_configure_cq(struct qeth_card *, enum qeth_cq);
 int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action);
 int qeth_query_ipassists(struct qeth_card *, enum qeth_prot_versions prot);
 
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -44,6 +44,7 @@ struct qeth_card_list_struct qeth_core_c
 EXPORT_SYMBOL_GPL(qeth_core_card_list);
 struct kmem_cache *qeth_core_header_cache;
 EXPORT_SYMBOL_GPL(qeth_core_header_cache);
+static struct kmem_cache *qeth_qdio_outbuf_cache;
 
 static struct device *qeth_core_root_dev;
 static unsigned int known_devices[][6] = QETH_MODELLIST_ARRAY;
@@ -56,6 +57,10 @@ static struct qeth_cmd_buffer *qeth_get_
 static void qeth_setup_ccw(struct qeth_channel *, unsigned char *, __u32);
 static void qeth_free_buffer_pool(struct qeth_card *);
 static int qeth_qdio_establish(struct qeth_card *);
+static void qeth_free_qdio_buffers(struct qeth_card *);
+static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
+		struct qeth_qdio_out_buffer *buf,
+		enum qeth_qdio_buffer_states newbufstate);
 
 
 static inline const char *qeth_get_cardname(struct qeth_card *card)
@@ -239,6 +244,150 @@ int qeth_realloc_buffer_pool(struct qeth
 }
 EXPORT_SYMBOL_GPL(qeth_realloc_buffer_pool);
 
+static inline int qeth_cq_init(struct qeth_card *card)
+{
+	int rc;
+
+	if (card->options.cq == QETH_CQ_ENABLED) {
+		QETH_DBF_TEXT(SETUP, 2, "cqinit");
+		memset(card->qdio.c_q->qdio_bufs, 0,
+		       QDIO_MAX_BUFFERS_PER_Q * sizeof(struct qdio_buffer));
+		card->qdio.c_q->next_buf_to_init = 127;
+		rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT,
+			     card->qdio.no_in_queues - 1, 0,
+			     127);
+		if (rc) {
+			QETH_DBF_TEXT_(SETUP, 2, "1err%d", rc);
+			goto out;
+		}
+	}
+	rc = 0;
+out:
+	return rc;
+}
+
+static inline int qeth_alloc_cq(struct qeth_card *card)
+{
+	int rc;
+
+	if (card->options.cq == QETH_CQ_ENABLED) {
+		int i;
+		struct qdio_outbuf_state *outbuf_states;
+
+		QETH_DBF_TEXT(SETUP, 2, "cqon");
+		card->qdio.c_q = kzalloc(sizeof(struct qeth_qdio_q),
+					 GFP_KERNEL);
+		if (!card->qdio.c_q) {
+			rc = -1;
+			goto kmsg_out;
+		}
+		QETH_DBF_HEX(SETUP, 2, &card->qdio.c_q, sizeof(void *));
+
+		for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) {
+			card->qdio.c_q->bufs[i].buffer =
+				&card->qdio.c_q->qdio_bufs[i];
+		}
+
+		card->qdio.no_in_queues = 2;
+
+		card->qdio.out_bufstates = (struct qdio_outbuf_state *)
+			kzalloc(card->qdio.no_out_queues *
+				QDIO_MAX_BUFFERS_PER_Q *
+				sizeof(struct qdio_outbuf_state), GFP_KERNEL);
+		outbuf_states = card->qdio.out_bufstates;
+		if (outbuf_states == NULL) {
+			rc = -1;
+			goto free_cq_out;
+		}
+		for (i = 0; i < card->qdio.no_out_queues; ++i) {
+			card->qdio.out_qs[i]->bufstates = outbuf_states;
+			outbuf_states += QDIO_MAX_BUFFERS_PER_Q;
+		}
+	} else {
+		QETH_DBF_TEXT(SETUP, 2, "nocq");
+		card->qdio.c_q = NULL;
+		card->qdio.no_in_queues = 1;
+	}
+	QETH_DBF_TEXT_(SETUP, 2, "iqc%d", card->qdio.no_in_queues);
+	rc = 0;
+out:
+	return rc;
+free_cq_out:
+	kfree(card->qdio.c_q);
+	card->qdio.c_q = NULL;
+kmsg_out:
+	dev_err(&card->gdev->dev, "Failed to create completion queue\n");
+	goto out;
+}
+
+static inline void qeth_free_cq(struct qeth_card *card)
+{
+	if (card->qdio.c_q) {
+		--card->qdio.no_in_queues;
+		kfree(card->qdio.c_q);
+		card->qdio.c_q = NULL;
+	}
+	kfree(card->qdio.out_bufstates);
+	card->qdio.out_bufstates = NULL;
+}
+
+static inline void qeth_cleanup_handled_pending(struct qeth_qdio_out_q *q,
+	int bidx, int forced_cleanup)
+{
+	if (q->bufs[bidx]->next_pending != NULL) {
+		struct qeth_qdio_out_buffer *head = q->bufs[bidx];
+		struct qeth_qdio_out_buffer *c = q->bufs[bidx]->next_pending;
+
+		while (c) {
+			if (forced_cleanup ||
+			    atomic_read(&c->state) ==
+			      QETH_QDIO_BUF_HANDLED_DELAYED) {
+				struct qeth_qdio_out_buffer *f = c;
+				QETH_CARD_TEXT(f->q->card, 5, "fp");
+				QETH_CARD_TEXT_(f->q->card, 5, "%lx", (long) f);
+				c = f->next_pending;
+				BUG_ON(head->next_pending != f);
+				head->next_pending = c;
+				kmem_cache_free(qeth_qdio_outbuf_cache, f);
+			} else {
+				head = c;
+				c = c->next_pending;
+			}
+
+		}
+	}
+}
+
+
+static inline void qeth_qdio_handle_aob(struct qeth_card *card,
+		unsigned long phys_aob_addr) {
+	struct qaob *aob;
+	struct qeth_qdio_out_buffer *buffer;
+
+	aob = (struct qaob *) phys_to_virt(phys_aob_addr);
+	QETH_CARD_TEXT(card, 5, "haob");
+	QETH_CARD_TEXT_(card, 5, "%lx", phys_aob_addr);
+	buffer = (struct qeth_qdio_out_buffer *) aob->user1;
+	QETH_CARD_TEXT_(card, 5, "%lx", aob->user1);
+
+	BUG_ON(buffer == NULL);
+
+	buffer->aob = NULL;
+	qeth_clear_output_buffer(buffer->q, buffer,
+				QETH_QDIO_BUF_HANDLED_DELAYED);
+	/* from here on: do not touch buffer anymore */
+	qdio_release_aob(aob);
+}
+
+static inline int qeth_is_cq(struct qeth_card *card, unsigned int queue)
+{
+	return card->options.cq == QETH_CQ_ENABLED &&
+	    card->qdio.c_q != NULL &&
+	    queue != 0 &&
+	    queue == card->qdio.no_in_queues - 1;
+}
+
+
 static int qeth_issue_next_read(struct qeth_card *card)
 {
 	int rc;
@@ -681,6 +830,7 @@ EXPORT_SYMBOL_GPL(qeth_do_run_thread);
 void qeth_schedule_recovery(struct qeth_card *card)
 {
 	QETH_CARD_TEXT(card, 2, "startrec");
+	WARN_ON(1);
 	if (qeth_set_thread_start_bit(card, QETH_RECOVER_THREAD) == 0)
 		schedule_work(&card->kernel_thread_starter);
 }
@@ -884,7 +1034,8 @@ out:
 }
 
 static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
-		struct qeth_qdio_out_buffer *buf)
+		struct qeth_qdio_out_buffer *buf,
+		enum qeth_qdio_buffer_states newbufstate)
 {
 	int i;
 	struct sk_buff *skb;
@@ -912,21 +1063,36 @@ static void qeth_clear_output_buffer(str
 	buf->buffer->element[15].eflags = 0;
 	buf->buffer->element[15].sflags = 0;
 	buf->next_element_to_fill = 0;
-	atomic_set(&buf->state, QETH_QDIO_BUF_EMPTY);
+	atomic_set(&buf->state, newbufstate);
+}
+
+static void qeth_clear_outq_buffers(struct qeth_qdio_out_q *q, int free)
+{
+	int j;
+
+	for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) {
+		if (!q->bufs[j])
+			continue;
+		qeth_cleanup_handled_pending(q, j, free);
+		qeth_clear_output_buffer(q, q->bufs[j], QETH_QDIO_BUF_EMPTY);
+		if (free) {
+			kmem_cache_free(qeth_qdio_outbuf_cache, q->bufs[j]);
+			q->bufs[j] = NULL;
+		}
+	}
 }
 
 void qeth_clear_qdio_buffers(struct qeth_card *card)
 {
-	int i, j;
+	int i;
 
 	QETH_CARD_TEXT(card, 2, "clearqdbf");
 	/* clear outbound buffers to free skbs */
-	for (i = 0; i < card->qdio.no_out_queues; ++i)
+	for (i = 0; i < card->qdio.no_out_queues; ++i) {
 		if (card->qdio.out_qs[i]) {
-			for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j)
-				qeth_clear_output_buffer(card->qdio.out_qs[i],
-						&card->qdio.out_qs[i]->bufs[j]);
+			qeth_clear_outq_buffers(card->qdio.out_qs[i], 0);
 		}
+	}
 }
 EXPORT_SYMBOL_GPL(qeth_clear_qdio_buffers);
 
@@ -945,11 +1111,14 @@ static void qeth_free_buffer_pool(struct
 
 static void qeth_free_qdio_buffers(struct qeth_card *card)
 {
-	int i, j;
+	int i;
 
 	if (atomic_xchg(&card->qdio.state, QETH_QDIO_UNINITIALIZED) ==
 		QETH_QDIO_UNINITIALIZED)
 		return;
+
+	qeth_free_cq(card);
+
 	kfree(card->qdio.in_q);
 	card->qdio.in_q = NULL;
 	/* inbound buffer pool */
@@ -957,9 +1126,7 @@ static void qeth_free_qdio_buffers(struc
 	/* free outbound qdio_qs */
 	if (card->qdio.out_qs) {
 		for (i = 0; i < card->qdio.no_out_queues; ++i) {
-			for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j)
-				qeth_clear_output_buffer(card->qdio.out_qs[i],
-						&card->qdio.out_qs[i]->bufs[j]);
+			qeth_clear_outq_buffers(card->qdio.out_qs[i], 1);
 			kfree(card->qdio.out_qs[i]);
 		}
 		kfree(card->qdio.out_qs);
@@ -1051,6 +1218,7 @@ static void qeth_set_intial_options(stru
 	card->options.performance_stats = 0;
 	card->options.rx_sg_cb = QETH_RX_SG_CB;
 	card->options.isolation = ISOLATION_MODE_NONE;
+	card->options.cq = QETH_CQ_DISABLED;
 }
 
 static int qeth_do_start_thread(struct qeth_card *card, unsigned long thread)
@@ -1180,6 +1348,7 @@ static int qeth_determine_card_type(stru
 			card->info.type = known_devices[i][QETH_DEV_MODEL_IND];
 			card->qdio.no_out_queues =
 				known_devices[i][QETH_QUEUE_NO_IND];
+			card->qdio.no_in_queues = 1;
 			card->info.is_multicast_different =
 				known_devices[i][QETH_MULTICAST_IND];
 			qeth_get_channel_path_desc(card);
@@ -2027,6 +2196,37 @@ static int qeth_ulp_setup(struct qeth_ca
 	return rc;
 }
 
+static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx)
+{
+	int rc;
+	struct qeth_qdio_out_buffer *newbuf;
+
+	rc = 0;
+	newbuf = kmem_cache_zalloc(qeth_qdio_outbuf_cache, GFP_ATOMIC);
+	if (!newbuf) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	newbuf->buffer = &q->qdio_bufs[bidx];
+	skb_queue_head_init(&newbuf->skb_list);
+	lockdep_set_class(&newbuf->skb_list.lock, &qdio_out_skb_queue_key);
+	newbuf->q = q;
+	newbuf->aob = NULL;
+	newbuf->next_pending = q->bufs[bidx];
+	atomic_set(&newbuf->state, QETH_QDIO_BUF_EMPTY);
+	q->bufs[bidx] = newbuf;
+	if (q->bufstates) {
+		q->bufstates[bidx].user = newbuf;
+		QETH_CARD_TEXT_(q->card, 2, "nbs%d", bidx);
+		QETH_CARD_TEXT_(q->card, 2, "%lx", (long) newbuf);
+		QETH_CARD_TEXT_(q->card, 2, "%lx",
+				(long) newbuf->next_pending);
+	}
+out:
+	return rc;
+}
+
+
 static int qeth_alloc_qdio_buffers(struct qeth_card *card)
 {
 	int i, j;
@@ -2038,7 +2238,7 @@ static int qeth_alloc_qdio_buffers(struc
 		return 0;
 
 	card->qdio.in_q = kmalloc(sizeof(struct qeth_qdio_q),
-				  GFP_KERNEL);
+				   GFP_KERNEL);
 	if (!card->qdio.in_q)
 		goto out_nomem;
 	QETH_DBF_TEXT(SETUP, 2, "inq");
@@ -2051,6 +2251,7 @@ static int qeth_alloc_qdio_buffers(struc
 	/* inbound buffer pool */
 	if (qeth_alloc_buffer_pool(card))
 		goto out_freeinq;
+
 	/* outbound */
 	card->qdio.out_qs =
 		kmalloc(card->qdio.no_out_queues *
@@ -2068,21 +2269,30 @@ static int qeth_alloc_qdio_buffers(struc
 		card->qdio.out_qs[i]->queue_no = i;
 		/* give outbound qeth_qdio_buffers their qdio_buffers */
 		for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) {
-			card->qdio.out_qs[i]->bufs[j].buffer =
-				&card->qdio.out_qs[i]->qdio_bufs[j];
-			skb_queue_head_init(&card->qdio.out_qs[i]->bufs[j].
-					    skb_list);
-			lockdep_set_class(
-				&card->qdio.out_qs[i]->bufs[j].skb_list.lock,
-				&qdio_out_skb_queue_key);
-			INIT_LIST_HEAD(&card->qdio.out_qs[i]->bufs[j].ctx_list);
+			BUG_ON(card->qdio.out_qs[i]->bufs[j] != NULL);
+			if (qeth_init_qdio_out_buf(card->qdio.out_qs[i], j))
+				goto out_freeoutqbufs;
 		}
 	}
+
+	/* completion */
+	if (qeth_alloc_cq(card))
+		goto out_freeoutq;
+
 	return 0;
 
+out_freeoutqbufs:
+	while (j > 0) {
+		--j;
+		kmem_cache_free(qeth_qdio_outbuf_cache,
+				card->qdio.out_qs[i]->bufs[j]);
+		card->qdio.out_qs[i]->bufs[j] = NULL;
+	}
 out_freeoutq:
-	while (i > 0)
+	while (i > 0) {
 		kfree(card->qdio.out_qs[--i]);
+		qeth_clear_outq_buffers(card->qdio.out_qs[i], 1);
+	}
 	kfree(card->qdio.out_qs);
 	card->qdio.out_qs = NULL;
 out_freepool:
@@ -2399,13 +2609,21 @@ int qeth_init_qdio_queues(struct qeth_ca
 		QETH_DBF_TEXT_(SETUP, 2, "1err%d", rc);
 		return rc;
 	}
+
+	/* completion */
+	rc = qeth_cq_init(card);
+	if (rc) {
+		return rc;
+	}
+
 	/* outbound queue */
 	for (i = 0; i < card->qdio.no_out_queues; ++i) {
 		memset(card->qdio.out_qs[i]->qdio_bufs, 0,
 		       QDIO_MAX_BUFFERS_PER_Q * sizeof(struct qdio_buffer));
 		for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) {
 			qeth_clear_output_buffer(card->qdio.out_qs[i],
-					&card->qdio.out_qs[i]->bufs[j]);
+					card->qdio.out_qs[i]->bufs[j],
+					QETH_QDIO_BUF_EMPTY);
 		}
 		card->qdio.out_qs[i]->card = card;
 		card->qdio.out_qs[i]->next_buf_to_fill = 0;
@@ -2787,8 +3005,6 @@ void qeth_queue_input_buffer(struct qeth
 				qeth_get_micros() -
 				card->perf_stats.inbound_do_qdio_start_time;
 		if (rc) {
-			dev_warn(&card->gdev->dev,
-				"QDIO reported an error, rc=%i\n", rc);
 			QETH_CARD_TEXT(card, 2, "qinberr");
 		}
 		queue->next_buf_to_init = (queue->next_buf_to_init + count) %
@@ -2862,12 +3078,12 @@ static int qeth_switch_to_nonpacking_if_
 				queue->card->perf_stats.sc_p_dp++;
 			queue->do_pack = 0;
 			/* flush packing buffers */
-			buffer = &queue->bufs[queue->next_buf_to_fill];
+			buffer = queue->bufs[queue->next_buf_to_fill];
 			if ((atomic_read(&buffer->state) ==
 						QETH_QDIO_BUF_EMPTY) &&
 			    (buffer->next_element_to_fill > 0)) {
 				atomic_set(&buffer->state,
-						QETH_QDIO_BUF_PRIMED);
+					   QETH_QDIO_BUF_PRIMED);
 				flush_count++;
 				queue->next_buf_to_fill =
 					(queue->next_buf_to_fill + 1) %
@@ -2878,6 +3094,7 @@ static int qeth_switch_to_nonpacking_if_
 	return flush_count;
 }
 
+
 /*
  * Called to flush a packing buffer if no more pci flags are on the queue.
  * Checks if there is a packing buffer and prepares it to be flushed.
@@ -2887,7 +3104,7 @@ static int qeth_flush_buffers_on_no_pci(
 {
 	struct qeth_qdio_out_buffer *buffer;
 
-	buffer = &queue->bufs[queue->next_buf_to_fill];
+	buffer = queue->bufs[queue->next_buf_to_fill];
 	if ((atomic_read(&buffer->state) == QETH_QDIO_BUF_EMPTY) &&
 	   (buffer->next_element_to_fill > 0)) {
 		/* it's a packing buffer */
@@ -2908,10 +3125,14 @@ static void qeth_flush_buffers(struct qe
 	unsigned int qdio_flags;
 
 	for (i = index; i < index + count; ++i) {
-		buf = &queue->bufs[i % QDIO_MAX_BUFFERS_PER_Q];
+		int bidx = i % QDIO_MAX_BUFFERS_PER_Q;
+		buf = queue->bufs[bidx];
 		buf->buffer->element[buf->next_element_to_fill - 1].eflags |=
 				SBAL_EFLAGS_LAST_ENTRY;
 
+		if (queue->bufstates)
+			queue->bufstates[bidx].user = buf;
+
 		if (queue->card->info.type == QETH_CARD_TYPE_IQD)
 			continue;
 
@@ -2963,6 +3184,9 @@ static void qeth_flush_buffers(struct qe
 		if (rc == QDIO_ERROR_SIGA_TARGET)
 			return;
 		QETH_CARD_TEXT(queue->card, 2, "flushbuf");
+		QETH_CARD_TEXT_(queue->card, 2, " q%d", queue->queue_no);
+		QETH_CARD_TEXT_(queue->card, 2, " idx%d", index);
+		QETH_CARD_TEXT_(queue->card, 2, " c%d", count);
 		QETH_CARD_TEXT_(queue->card, 2, " err%d", rc);
 
 		/* this must not happen under normal circumstances. if it
@@ -3024,14 +3248,120 @@ void qeth_qdio_start_poll(struct ccw_dev
 }
 EXPORT_SYMBOL_GPL(qeth_qdio_start_poll);
 
+int qeth_configure_cq(struct qeth_card *card, enum qeth_cq cq)
+{
+	int rc;
+
+	if (card->options.cq ==  QETH_CQ_NOTAVAILABLE) {
+		rc = -1;
+		goto out;
+	} else {
+		if (card->options.cq == cq) {
+			rc = 0;
+			goto out;
+		}
+
+		if (card->state != CARD_STATE_DOWN &&
+		    card->state != CARD_STATE_RECOVER) {
+			rc = -1;
+			goto out;
+		}
+
+		qeth_free_qdio_buffers(card);
+		card->options.cq = cq;
+		rc = 0;
+	}
+out:
+	return rc;
+
+}
+EXPORT_SYMBOL_GPL(qeth_configure_cq);
+
+
+static void qeth_qdio_cq_handler(struct qeth_card *card,
+		unsigned int qdio_err,
+		unsigned int queue, int first_element, int count) {
+	struct qeth_qdio_q *cq = card->qdio.c_q;
+	int i;
+	int rc;
+
+	if (!qeth_is_cq(card, queue))
+		goto out;
+
+	QETH_CARD_TEXT_(card, 5, "qcqhe%d", first_element);
+	QETH_CARD_TEXT_(card, 5, "qcqhc%d", count);
+	QETH_CARD_TEXT_(card, 5, "qcqherr%d", qdio_err);
+
+	if (qdio_err) {
+		netif_stop_queue(card->dev);
+		qeth_schedule_recovery(card);
+		goto out;
+	}
+
+	if (card->options.performance_stats) {
+		card->perf_stats.cq_cnt++;
+		card->perf_stats.cq_start_time = qeth_get_micros();
+	}
+
+	for (i = first_element; i < first_element + count; ++i) {
+		int bidx = i % QDIO_MAX_BUFFERS_PER_Q;
+		struct qdio_buffer *buffer = &cq->qdio_bufs[bidx];
+		int e;
+
+		e = 0;
+		while (buffer->element[e].addr) {
+			unsigned long phys_aob_addr;
+
+			phys_aob_addr = (unsigned long) buffer->element[e].addr;
+			qeth_qdio_handle_aob(card, phys_aob_addr);
+			buffer->element[e].addr = NULL;
+			buffer->element[e].eflags = 0;
+			buffer->element[e].sflags = 0;
+			buffer->element[e].length = 0;
+
+			++e;
+		}
+
+		buffer->element[15].eflags = 0;
+		buffer->element[15].sflags = 0;
+	}
+	rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, queue,
+		    card->qdio.c_q->next_buf_to_init,
+		    count);
+	if (rc) {
+		dev_warn(&card->gdev->dev,
+			"QDIO reported an error, rc=%i\n", rc);
+		QETH_CARD_TEXT(card, 2, "qcqherr");
+	}
+	card->qdio.c_q->next_buf_to_init = (card->qdio.c_q->next_buf_to_init
+				   + count) % QDIO_MAX_BUFFERS_PER_Q;
+
+	netif_wake_queue(card->dev);
+
+	if (card->options.performance_stats) {
+		int delta_t = qeth_get_micros();
+		delta_t -= card->perf_stats.cq_start_time;
+		card->perf_stats.cq_time += delta_t;
+	}
+out:
+	return;
+}
+
 void qeth_qdio_input_handler(struct ccw_device *ccwdev, unsigned int qdio_err,
-		unsigned int queue, int first_element, int count,
+		unsigned int queue, int first_elem, int count,
 		unsigned long card_ptr)
 {
 	struct qeth_card *card = (struct qeth_card *)card_ptr;
 
-	if (qdio_err)
+	QETH_CARD_TEXT_(card, 2, "qihq%d", queue);
+	QETH_CARD_TEXT_(card, 2, "qiec%d", qdio_err);
+
+	if (qeth_is_cq(card, queue))
+		qeth_qdio_cq_handler(card, qdio_err, queue, first_elem, count);
+	else if (qdio_err)
 		qeth_schedule_recovery(card);
+
+
 }
 EXPORT_SYMBOL_GPL(qeth_qdio_input_handler);
 
@@ -3057,9 +3387,26 @@ void qeth_qdio_output_handler(struct ccw
 			qeth_get_micros();
 	}
 	for (i = first_element; i < (first_element + count); ++i) {
-		buffer = &queue->bufs[i % QDIO_MAX_BUFFERS_PER_Q];
+		int bidx = i % QDIO_MAX_BUFFERS_PER_Q;
+		buffer = queue->bufs[bidx];
 		qeth_handle_send_error(card, buffer, qdio_error);
-		qeth_clear_output_buffer(queue, buffer);
+
+		if (queue->bufstates &&
+		    (queue->bufstates[bidx].flags &
+		     QDIO_OUTBUF_STATE_FLAG_PENDING) != 0) {
+			buffer->aob = queue->bufstates[bidx].aob;
+			QETH_CARD_TEXT_(queue->card, 5, "pel%d", bidx);
+			QETH_CARD_TEXT_(queue->card, 5, "aob");
+			QETH_CARD_TEXT_(queue->card, 5, "%lx",
+					virt_to_phys(buffer->aob));
+			BUG_ON(bidx < 0 || bidx >= QDIO_MAX_BUFFERS_PER_Q);
+			if (qeth_init_qdio_out_buf(queue, bidx))
+				qeth_schedule_recovery(card);
+		} else {
+			qeth_clear_output_buffer(queue, buffer,
+						QETH_QDIO_BUF_EMPTY);
+		}
+		qeth_cleanup_handled_pending(queue, bidx, 0);
 	}
 	atomic_sub(count, &queue->used_buffers);
 	/* check if we need to do something on this outbound queue */
@@ -3291,7 +3638,7 @@ int qeth_do_send_packet_fast(struct qeth
 			      QETH_OUT_Q_LOCKED) != QETH_OUT_Q_UNLOCKED);
 	/* ... now we've got the queue */
 	index = queue->next_buf_to_fill;
-	buffer = &queue->bufs[queue->next_buf_to_fill];
+	buffer = queue->bufs[queue->next_buf_to_fill];
 	/*
 	 * check if buffer is empty to make sure that we do not 'overtake'
 	 * ourselves and try to fill a buffer that is already primed
@@ -3325,7 +3672,7 @@ int qeth_do_send_packet(struct qeth_card
 	while (atomic_cmpxchg(&queue->state, QETH_OUT_Q_UNLOCKED,
 			      QETH_OUT_Q_LOCKED) != QETH_OUT_Q_UNLOCKED);
 	start_index = queue->next_buf_to_fill;
-	buffer = &queue->bufs[queue->next_buf_to_fill];
+	buffer = queue->bufs[queue->next_buf_to_fill];
 	/*
 	 * check if buffer is empty to make sure that we do not 'overtake'
 	 * ourselves and try to fill a buffer that is already primed
@@ -3347,7 +3694,7 @@ int qeth_do_send_packet(struct qeth_card
 			queue->next_buf_to_fill =
 				(queue->next_buf_to_fill + 1) %
 				QDIO_MAX_BUFFERS_PER_Q;
-			buffer = &queue->bufs[queue->next_buf_to_fill];
+			buffer = queue->bufs[queue->next_buf_to_fill];
 			/* we did a step forward, so check buffer state
 			 * again */
 			if (atomic_read(&buffer->state) !=
@@ -3925,6 +4272,20 @@ static void qeth_determine_capabilities(
 	if (rc)
 		QETH_DBF_TEXT_(SETUP, 2, "6err%d", rc);
 
+	QETH_DBF_TEXT_(SETUP, 2, "qfmt%d", card->ssqd.qfmt);
+	QETH_DBF_TEXT_(SETUP, 2, "%d", card->ssqd.qdioac1);
+	QETH_DBF_TEXT_(SETUP, 2, "%d", card->ssqd.qdioac3);
+	QETH_DBF_TEXT_(SETUP, 2, "icnt%d", card->ssqd.icnt);
+	if (!((card->ssqd.qfmt != QDIO_IQDIO_QFMT) ||
+	    ((card->ssqd.qdioac1 & CHSC_AC1_INITIATE_INPUTQ) == 0) ||
+	    ((card->ssqd.qdioac3 & CHSC_AC3_FORMAT2_CQ_AVAILABLE) == 0))) {
+		dev_info(&card->gdev->dev,
+			"Completion Queueing supported\n");
+	} else {
+		card->options.cq = QETH_CQ_NOTAVAILABLE;
+	}
+
+
 out_offline:
 	if (ddev_offline == 1)
 		ccw_device_set_offline(ddev);
@@ -3932,6 +4293,24 @@ out:
 	return;
 }
 
+static inline void qeth_qdio_establish_cq(struct qeth_card *card,
+	struct qdio_buffer **in_sbal_ptrs,
+	void (**queue_start_poll) (struct ccw_device *, int, unsigned long)) {
+	int i;
+
+	if (card->options.cq == QETH_CQ_ENABLED) {
+		int offset = QDIO_MAX_BUFFERS_PER_Q *
+			     (card->qdio.no_in_queues - 1);
+		i = QDIO_MAX_BUFFERS_PER_Q * (card->qdio.no_in_queues - 1);
+		for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) {
+			in_sbal_ptrs[offset + i] = (struct qdio_buffer *)
+				virt_to_phys(card->qdio.c_q->bufs[i].buffer);
+		}
+
+		queue_start_poll[card->qdio.no_in_queues - 1] = NULL;
+	}
+}
+
 static int qeth_qdio_establish(struct qeth_card *card)
 {
 	struct qdio_initialize init_data;
@@ -3954,22 +4333,28 @@ static int qeth_qdio_establish(struct qe
 	qeth_create_qib_param_field(card, qib_param_field);
 	qeth_create_qib_param_field_blkt(card, qib_param_field);
 
-	in_sbal_ptrs = kmalloc(QDIO_MAX_BUFFERS_PER_Q * sizeof(void *),
+	in_sbal_ptrs = kmalloc(card->qdio.no_in_queues *
+			       QDIO_MAX_BUFFERS_PER_Q * sizeof(void *),
 			       GFP_KERNEL);
 	if (!in_sbal_ptrs) {
 		rc = -ENOMEM;
 		goto out_free_qib_param;
 	}
-	for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i)
+	for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) {
 		in_sbal_ptrs[i] = (struct qdio_buffer *)
 			virt_to_phys(card->qdio.in_q->bufs[i].buffer);
+	}
 
-	queue_start_poll = kmalloc(sizeof(void *) * 1, GFP_KERNEL);
+	queue_start_poll = kzalloc(sizeof(void *) * card->qdio.no_in_queues,
+				   GFP_KERNEL);
 	if (!queue_start_poll) {
 		rc = -ENOMEM;
 		goto out_free_in_sbals;
 	}
-	queue_start_poll[0] = card->discipline.start_poll;
+	for (i = 0; i < card->qdio.no_in_queues; ++i)
+		queue_start_poll[i] = card->discipline.start_poll;
+
+	qeth_qdio_establish_cq(card, in_sbal_ptrs, queue_start_poll);
 
 	out_sbal_ptrs =
 		kmalloc(card->qdio.no_out_queues * QDIO_MAX_BUFFERS_PER_Q *
@@ -3981,7 +4366,7 @@ static int qeth_qdio_establish(struct qe
 	for (i = 0, k = 0; i < card->qdio.no_out_queues; ++i)
 		for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j, ++k) {
 			out_sbal_ptrs[k] = (struct qdio_buffer *)virt_to_phys(
-				card->qdio.out_qs[i]->bufs[j].buffer);
+				card->qdio.out_qs[i]->bufs[j]->buffer);
 		}
 
 	memset(&init_data, 0, sizeof(struct qdio_initialize));
@@ -3989,7 +4374,7 @@ static int qeth_qdio_establish(struct qe
 	init_data.q_format               = qeth_get_qdio_q_format(card);
 	init_data.qib_param_field_format = 0;
 	init_data.qib_param_field        = qib_param_field;
-	init_data.no_input_qs            = 1;
+	init_data.no_input_qs            = card->qdio.no_in_queues;
 	init_data.no_output_qs           = card->qdio.no_out_queues;
 	init_data.input_handler          = card->discipline.input_handler;
 	init_data.output_handler         = card->discipline.output_handler;
@@ -3997,6 +4382,7 @@ static int qeth_qdio_establish(struct qe
 	init_data.int_parm               = (unsigned long) card;
 	init_data.input_sbal_addr_array  = (void **) in_sbal_ptrs;
 	init_data.output_sbal_addr_array = (void **) out_sbal_ptrs;
+	init_data.output_sbal_state_array = card->qdio.out_bufstates;
 	init_data.scan_threshold =
 		(card->info.type == QETH_CARD_TYPE_IQD) ? 8 : 32;
 
@@ -4013,6 +4399,17 @@ static int qeth_qdio_establish(struct qe
 			qdio_free(CARD_DDEV(card));
 		}
 	}
+
+	switch (card->options.cq) {
+	case QETH_CQ_ENABLED:
+		dev_info(&card->gdev->dev, "Completion Queue support enabled");
+		break;
+	case QETH_CQ_DISABLED:
+		dev_info(&card->gdev->dev, "Completion Queue support disabled");
+		break;
+	default:
+		break;
+	}
 out:
 	kfree(out_sbal_ptrs);
 out_free_queue_start_poll:
@@ -4191,6 +4588,8 @@ static inline int qeth_create_skb_frag(s
 		(*pskb)->truesize += data_len;
 		(*pfrag)++;
 	}
+
+
 	return 0;
 }
 
@@ -4664,6 +5063,8 @@ static struct {
 	{"tx do_QDIO count"},
 	{"tx csum"},
 	{"tx lin"},
+	{"cq handler count"},
+	{"cq handler time"}
 };
 
 int qeth_core_get_sset_count(struct net_device *dev, int stringset)
@@ -4722,6 +5123,8 @@ void qeth_core_get_ethtool_stats(struct
 	data[32] = card->perf_stats.outbound_do_qdio_cnt;
 	data[33] = card->perf_stats.tx_csum;
 	data[34] = card->perf_stats.tx_lin;
+	data[35] = card->perf_stats.cq_cnt;
+	data[36] = card->perf_stats.cq_time;
 }
 EXPORT_SYMBOL_GPL(qeth_core_get_ethtool_stats);
 
@@ -4880,7 +5283,16 @@ static int __init qeth_core_init(void)
 		goto slab_err;
 	}
 
+	qeth_qdio_outbuf_cache = kmem_cache_create("qeth_buf",
+			sizeof(struct qeth_qdio_out_buffer), 0, 0, NULL);
+	if (!qeth_qdio_outbuf_cache) {
+		rc = -ENOMEM;
+		goto cqslab_err;
+	}
+
 	return 0;
+cqslab_err:
+	kmem_cache_destroy(qeth_core_header_cache);
 slab_err:
 	root_device_unregister(qeth_core_root_dev);
 register_err:
@@ -4905,6 +5317,7 @@ static void __exit qeth_core_exit(void)
 			   &driver_attr_group);
 	ccwgroup_driver_unregister(&qeth_core_ccwgroup_driver);
 	ccw_driver_unregister(&qeth_ccw_driver);
+	kmem_cache_destroy(qeth_qdio_outbuf_cache);
 	kmem_cache_destroy(qeth_core_header_cache);
 	qeth_unregister_dbf_views();
 	pr_info("core functions removed\n");


^ permalink raw reply

* [patch 04/11] [PATCH] af_iucv: cleanup - use iucv_sk(sk) early
From: frank.blaschka @ 2011-07-27 16:12 UTC (permalink / raw)
  To: netdev, linux-s390; +Cc: Ursula Braun
In-Reply-To: <20110727161242.018577444@de.ibm.com>

[-- Attachment #1: net1117-af_iucv-cleanup-use-iucv_sk-early.patch --]
[-- Type: text/plain, Size: 3567 bytes --]

From: Ursula Braun <ursula.braun@de.ibm.com>

Code cleanup making make use of local variable for struct iucv_sock.

Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
---
 net/iucv/af_iucv.c |   44 +++++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 21 deletions(-)

--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -446,23 +446,25 @@ static void iucv_sock_init(struct sock *
 static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio)
 {
 	struct sock *sk;
+	struct iucv_sock *iucv;
 
 	sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto);
 	if (!sk)
 		return NULL;
+	iucv = iucv_sk(sk);
 
 	sock_init_data(sock, sk);
-	INIT_LIST_HEAD(&iucv_sk(sk)->accept_q);
-	spin_lock_init(&iucv_sk(sk)->accept_q_lock);
-	skb_queue_head_init(&iucv_sk(sk)->send_skb_q);
-	INIT_LIST_HEAD(&iucv_sk(sk)->message_q.list);
-	spin_lock_init(&iucv_sk(sk)->message_q.lock);
-	skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q);
-	iucv_sk(sk)->send_tag = 0;
-	iucv_sk(sk)->flags = 0;
-	iucv_sk(sk)->msglimit = IUCV_QUEUELEN_DEFAULT;
-	iucv_sk(sk)->path = NULL;
-	memset(&iucv_sk(sk)->src_user_id , 0, 32);
+	INIT_LIST_HEAD(&iucv->accept_q);
+	spin_lock_init(&iucv->accept_q_lock);
+	skb_queue_head_init(&iucv->send_skb_q);
+	INIT_LIST_HEAD(&iucv->message_q.list);
+	spin_lock_init(&iucv->message_q.lock);
+	skb_queue_head_init(&iucv->backlog_skb_q);
+	iucv->send_tag = 0;
+	iucv->flags = 0;
+	iucv->msglimit = IUCV_QUEUELEN_DEFAULT;
+	iucv->path = NULL;
+	memset(&iucv->src_user_id , 0, 32);
 
 	sk->sk_destruct = iucv_sock_destruct;
 	sk->sk_sndtimeo = IUCV_CONN_TIMEOUT;
@@ -669,7 +671,7 @@ static int iucv_sock_connect(struct sock
 {
 	struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr;
 	struct sock *sk = sock->sk;
-	struct iucv_sock *iucv;
+	struct iucv_sock *iucv = iucv_sk(sk);
 	unsigned char user_data[16];
 	int err;
 
@@ -691,14 +693,13 @@ static int iucv_sock_connect(struct sock
 	lock_sock(sk);
 
 	/* Set the destination information */
-	memcpy(iucv_sk(sk)->dst_user_id, sa->siucv_user_id, 8);
-	memcpy(iucv_sk(sk)->dst_name, sa->siucv_name, 8);
+	memcpy(iucv->dst_user_id, sa->siucv_user_id, 8);
+	memcpy(iucv->dst_name, sa->siucv_name, 8);
 
 	high_nmcpy(user_data, sa->siucv_name);
-	low_nmcpy(user_data, iucv_sk(sk)->src_name);
+	low_nmcpy(user_data, iucv->src_name);
 	ASCEBC(user_data, sizeof(user_data));
 
-	iucv = iucv_sk(sk);
 	/* Create path. */
 	iucv->path = iucv_path_alloc(iucv->msglimit,
 				     IUCV_IPRMDATA, GFP_KERNEL);
@@ -836,20 +837,21 @@ static int iucv_sock_getname(struct sock
 {
 	struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr;
 	struct sock *sk = sock->sk;
+	struct iucv_sock *iucv = iucv_sk(sk);
 
 	addr->sa_family = AF_IUCV;
 	*len = sizeof(struct sockaddr_iucv);
 
 	if (peer) {
-		memcpy(siucv->siucv_user_id, iucv_sk(sk)->dst_user_id, 8);
-		memcpy(siucv->siucv_name, &iucv_sk(sk)->dst_name, 8);
+		memcpy(siucv->siucv_user_id, iucv->dst_user_id, 8);
+		memcpy(siucv->siucv_name, iucv->dst_name, 8);
 	} else {
-		memcpy(siucv->siucv_user_id, iucv_sk(sk)->src_user_id, 8);
-		memcpy(siucv->siucv_name, iucv_sk(sk)->src_name, 8);
+		memcpy(siucv->siucv_user_id, iucv->src_user_id, 8);
+		memcpy(siucv->siucv_name, iucv->src_name, 8);
 	}
 	memset(&siucv->siucv_port, 0, sizeof(siucv->siucv_port));
 	memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr));
-	memset(siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid));
+	memset(&siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid));
 
 	return 0;
 }


^ permalink raw reply

* Re: IP over 802.2 with LLC/SNAP
From: Rick Jones @ 2011-07-27 16:26 UTC (permalink / raw)
  To: Alan Cox; +Cc: Alan Ott, linux-kernel, linux-net, netdev
In-Reply-To: <20110727133209.0d9dd6c4@lxorguk.ukuu.org.uk>

On 07/27/2011 05:32 AM, Alan Cox wrote:
>> So the question is, does Linux support IP over 802.2 with LLC/SNAP? Is
>> there a sysfs/proc entry that I have to turn on to make this work (I
>> didn't find one)? I have the LLC2 module loaded, and I believe my packet
>> to be correct, since Windows recognizes it and since Wireshark doesn't
>> give any red flags on it. I've been unable to find anything about this
>> kind of thing in my searching.
>
> Linux supports LLC/SNAP and various things over it (IPX/Appletalk DDP
> etc) but not IP over it, as it's one of those standards bodies driven
> bogosities which nobody ever actually deployed.

Well...  Hewlett-Packard deployed it in the 80's and 90's in MPE and 
HP-UX, HP-UX because until the mid-ish 1990's MPE only spoke 802.2 not 
"Ethernet."  By the late 1990's I think it was gone from HP-UX.

rick jones

^ permalink raw reply

* [PATCH 1/1] tc: filter: fix default 'protocol all' on little-endian platforms
From: Florian Westphal @ 2011-07-27 16:47 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

when specifiying filters without 'protocol' keyword, tc will
default to 'protocol all'.

Unfortunately, this missed a byte-ordering conversion.
---
 tc/tc_filter.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/tc/tc_filter.c b/tc/tc_filter.c
index 919c57c..4e55812 100644
--- a/tc/tc_filter.c
+++ b/tc/tc_filter.c
@@ -73,7 +73,7 @@ int tc_filter_modify(int cmd, unsigned flags, int argc, char **argv)
 	req.t.tcm_family = AF_UNSPEC;
 
 	if (cmd == RTM_NEWTFILTER && flags & NLM_F_CREATE)
-		protocol = ETH_P_ALL;
+		protocol = htons(ETH_P_ALL);
 
 	while (argc > 0) {
 		if (strcmp(*argv, "dev") == 0) {
-- 
1.7.3.4


^ permalink raw reply related

* Re: [PATCH] vfs: avoid taking locks if inode not in lists
From: Andi Kleen @ 2011-07-27 17:12 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Christoph Hellwig, Tim Chen, Al Viro, David Miller, Andi Kleen,
	Matthew Wilcox, Anton Blanchard, npiggin, linux-kernel,
	linux-fsdevel, netdev
In-Reply-To: <1311780065.2356.18.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

> Before even taking locks, we can perform an unlocked test to check if
> inode can possibly be in the lists.
> 
> On a 2x4x2 machine, a close(socket()) pair can be 200% faster with these
> changes.

Great result! Seems simple and obvious to me.

-Andi

^ permalink raw reply

* pull request: wireless-next-2.6 2011-07-27
From: John W. Linville @ 2011-07-27 18:49 UTC (permalink / raw)
  To: davem-fT/PcQaiUtIeIZ0/mPfg9Q
  Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

Dave,

Here is a handful of fixes intended for 3.1.  This includes a
user-visible typo fix, a fix for a user after free in the new pn533
NFC driver, a cfg80211 fix for a possible NULL pointer dereference,
a fix for an invalid memory access in b43, and another b43 fix for
a memory corruption problem.

On top of that b43 memory corruption fix, there is a patch to remove
BROKEN from the B43_BCMA Kconfig entry, which is key to enabling
support for some of the more modern Broadcom wireless hardware.
I'm sure the Rafał (and a number of others) would love to see that
merged while the 3.1 merge window is still open as well.

Please let me know if there are problems...

Thanks!

John

---

The following changes since commit 41bf37117b47fc5ce2aae91f6a108e7e42e0b046:

  Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-next-2.6 into for-davem (2011-07-22 17:51:16 -0400)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-next-2.6.git for-davem

Dan Carpenter (1):
      NFC: pn533: use after free in pn533_disconnect()

John W. Linville (1):
      Merge branch 'master' of git://git.kernel.org/.../linville/wireless-next-2.6 into for-davem

Mihai Moldovan (1):
      wireless: fix a typo in ignore_reg_update

Pavel Roskin (1):
      b43: fix invalid memory access in b43_ssb_remove()

Rafał Miłecki (2):
      b43: bus: fix memory corruption when setting driver's data
      b43: bcma: drop BROKEN

Sven Neumann (1):
      cfg80211: really ignore the regulatory request

 drivers/net/wireless/b43/Kconfig |    2 +-
 drivers/net/wireless/b43/bus.c   |    2 ++
 drivers/net/wireless/b43/main.c  |    5 +++--
 drivers/nfc/pn533.c              |    2 +-
 net/wireless/reg.c               |    7 ++++---
 5 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/b43/Kconfig b/drivers/net/wireless/b43/Kconfig
index d2293dc..3cab843 100644
--- a/drivers/net/wireless/b43/Kconfig
+++ b/drivers/net/wireless/b43/Kconfig
@@ -28,7 +28,7 @@ config B43
 
 config B43_BCMA
 	bool "Support for BCMA bus"
-	depends on B43 && BCMA && BROKEN
+	depends on B43 && BCMA
 	default y
 
 config B43_SSB
diff --git a/drivers/net/wireless/b43/bus.c b/drivers/net/wireless/b43/bus.c
index 64c3f65..05f6c7b 100644
--- a/drivers/net/wireless/b43/bus.c
+++ b/drivers/net/wireless/b43/bus.c
@@ -244,10 +244,12 @@ void b43_bus_set_wldev(struct b43_bus_dev *dev, void *wldev)
 #ifdef CONFIG_B43_BCMA
 	case B43_BUS_BCMA:
 		bcma_set_drvdata(dev->bdev, wldev);
+		break;
 #endif
 #ifdef CONFIG_B43_SSB
 	case B43_BUS_SSB:
 		ssb_set_drvdata(dev->sdev, wldev);
+		break;
 #endif
 	}
 }
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 73fbf03..a92cde8 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -5350,6 +5350,7 @@ static void b43_ssb_remove(struct ssb_device *sdev)
 {
 	struct b43_wl *wl = ssb_get_devtypedata(sdev);
 	struct b43_wldev *wldev = ssb_get_drvdata(sdev);
+	struct b43_bus_dev *dev = wldev->dev;
 
 	/* We must cancel any work here before unregistering from ieee80211,
 	 * as the ieee80211 unreg will destroy the workqueue. */
@@ -5365,14 +5366,14 @@ static void b43_ssb_remove(struct ssb_device *sdev)
 		ieee80211_unregister_hw(wl->hw);
 	}
 
-	b43_one_core_detach(wldev->dev);
+	b43_one_core_detach(dev);
 
 	if (list_empty(&wl->devlist)) {
 		b43_leds_unregister(wl);
 		/* Last core on the chip unregistered.
 		 * We can destroy common struct b43_wl.
 		 */
-		b43_wireless_exit(wldev->dev, wl);
+		b43_wireless_exit(dev, wl);
 	}
 }
 
diff --git a/drivers/nfc/pn533.c b/drivers/nfc/pn533.c
index 0372315..c77e054 100644
--- a/drivers/nfc/pn533.c
+++ b/drivers/nfc/pn533.c
@@ -1596,7 +1596,7 @@ static void pn533_disconnect(struct usb_interface *interface)
 	usb_free_urb(dev->out_urb);
 	kfree(dev);
 
-	nfc_dev_info(&dev->interface->dev, "NXP PN533 NFC device disconnected");
+	nfc_dev_info(&interface->dev, "NXP PN533 NFC device disconnected");
 }
 
 static struct usb_driver pn533_driver = {
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 1ad0f39..02751db 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -903,7 +903,7 @@ static bool ignore_reg_update(struct wiphy *wiphy,
 	    initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
 	    !is_world_regdom(last_request->alpha2)) {
 		REG_DBG_PRINT("Ignoring regulatory request %s "
-			      "since the driver requires its own regulaotry "
+			      "since the driver requires its own regulatory "
 			      "domain to be set first",
 			      reg_initiator_name(initiator));
 		return true;
@@ -1125,12 +1125,13 @@ void wiphy_update_regulatory(struct wiphy *wiphy,
 	enum ieee80211_band band;
 
 	if (ignore_reg_update(wiphy, initiator))
-		goto out;
+		return;
+
 	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
 		if (wiphy->bands[band])
 			handle_band(wiphy, band, initiator);
 	}
-out:
+
 	reg_process_beacons(wiphy);
 	reg_process_ht_flags(wiphy);
 	if (wiphy->reg_notifier)
-- 
John W. Linville		Someday the world will need a hero, and you
linville-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org			might be all we have.  Be ready.
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Re: strange behaviour of MC7700 with sierra_net
From: Dan Williams @ 2011-07-27 19:40 UTC (permalink / raw)
  To: Phil Sutter
  Cc: Elina Pasheva, dbrownell-Rn4VEauK+AKRv+LV9MX5uipxlwaOVQ5f,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-usb-u79uwXL29TY76Z2rM5mHXA, Rory Filer
In-Reply-To: <20110727141246.GC29616-2COwY06ShZsYt6otZODPyA@public.gmane.org>

On Wed, 2011-07-27 at 16:12 +0200, Phil Sutter wrote:
> Hello everyone,
> 
> I am testing the above module on linux-2.6.39.2 which should contain the
> latest changes to at least sierra_net.c. Although I am able to bring the
> module up and then can get traffic through it, initialisation seems to
> be a little picky.

I wonder if the firmware changed and Sierra hasn't updated the driver
yet.  I've got a usb306 which also uses sierra_net which doesn't have
this problem.  It looks like the driver tries to sync with the firmware
right after binding to the net interface, so that means at least a
couple of SYNC requests have already happened by the time you start
getting errors.  My first thought here is simply that the firmware on
the MC7700 doesn't work like the rest of the devices that sierra_net
expects, and for that you'd have to get Sierra to weigh in on the
required changes :(

Since the MC7700 is an LTE module it's 100% likely the firmware is
different, or at least significantly rebased, from the existing Sierra
HSPA/EVDO parts that use sierra_net and thus I wouldn't necessarily
expect it to work out of the box.

Dan

> After connecting the module via USB, I get the following final
> initialisation message:
> | Jul 27 14:01:15 (none) user.info kernel: [166371.982356] sierra_net 1-1:1.7: wwan0: register 'sierra_net' at usb-0000:00:08.2-1, Sierra Wireless USB-to-WWAN Modem, 4a:82:22:b9:05:07
> 
> Doing nothing (successfully), the driver starts printing errors after a
> while:
> | Jul 27 14:02:15 (none) user.err kernel: [166432.201461] sierra_net 1-1:1.7: wwan0: Submit SYNC failed -32
> | Jul 27 14:02:15 (none) user.err kernel: [166432.201609] sierra_net 1-1:1.7: wwan0: Send SYNC failed, status -32
> | Jul 27 14:02:15 (none) user.err kernel: [166432.205446] sierra_net 1-1:1.7: wwan0: Submit SYNC failed -32
> | Jul 27 14:02:15 (none) user.err kernel: [166432.205590] sierra_net 1-1:1.7: wwan0: Send SYNC failed, status -32
> 
> The above messages are the first ones to appear (so there's a delay of
> 60 seconds in between), and they repeat each 2 seconds from then on.
> Depending on how I continue at that point, results vary:
> 
> a) 'ip link set wwan0 up': setting the interface up stops the above error
>    messages from being printed. This worked every time I tried.
> 
> b) Sending 'ATZ' on the control-tty: this makes the error-messages
>    disappear for about 6 seconds, so two iterations of the sync-timer
>    seem to succeed. When I then try to continue initialisation, I usually
>    get to 'AT+C' (for AT+CPIN='1234'), then the control-tty dies (neither
>    echoing of typed characters, nor feedback from the modem printed).
> 
> Trying a) from the state after b) indeed makes the error-messages go
> away, but the tty stays dead until I power-cycle the module. Needless to
> say, without the control-tty the module is completely useless.
> 
> My first thought was that the driver shouldn't try to SYNC while the
> interface being down, but apparently sierra_net_send_sync() doesn't get
> called anymore after setting the interface up.
> 
> So in order to prevent the module from dieing, I need to up the
> interface before initialisation via AT-interface.
> 
> Another interesting aspect: the above error stays gone after the
> interface has been upped once, even if it's brought down right
> afterwards without doing anything else. OK, not completely - there needs
> to be a little delay in which the interface stays up, but a tenth of a
> second was enough.
> 
> What else can I do to track this problem down further? What additional
> information do you need from me? Any advice is highly appreciated, of
> course!
> 
> Greetings, Phil
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: IPv6: autoconfiguration and suspend/resume or link down/up
From: Dan Williams @ 2011-07-27 19:48 UTC (permalink / raw)
  To: David Miller; +Cc: jbohac, netdev, herbert, shemminger
In-Reply-To: <20110722.010628.1678943945721626312.davem@davemloft.net>

On Fri, 2011-07-22 at 01:06 -0700, David Miller wrote:
> From: Jiri Bohac <jbohac@suse.cz>
> Date: Tue, 19 Jul 2011 20:02:53 +0200
> 
> > When the cable is unplugged and plugged in again, we already get
> > notified through linkwatch -> netdev_state_change ->
> >   -> call_netdevice_notifiers(NETDEV_CHANGE, ...)
> > However, if the device has already been autoconfigured,
> > addrconf_notify() only handles this event by printing a
> > message.
> > 
> > So my idea was to:
> > - handle link up/down in addrconf_notify() similarly to
> >   NETDEV_UP/NETDEV_DOWN
> > 
> > - on suspend, faking a link down event; on resume, faking a link up event
> >   (or better, having a special event type for suspend/resume)
> > 
> > This would cause autoconfiguration to be restarted on resume as
> > well as cable plug/unplug, solving both the above problems.
> > 
> > Or do we want to completely rely on userspace tools
> > (networkmanager/ifplug) and expect them to do NETDEV_DOWN on
> > unplug/suspend and NETDEV_UP on plug/resume?
> > 
> > Any thoughts?
> 
> This is an oft-reocurring discussion, what to do on link up/down
> events wrt. all sorts of autoconfiguration.
> 
> My gut instinct is that on any link state change (physical link down,
> suspend) we should be prepared to renegotiate everything and anything
> since as you state we could be on a different physical network.
> 
> Suspend is even more important because while we were suspended we
> could be on the same network but the routers present and available
> might have changed completely.
> 
> In userspace I've noticed that we've grown an ecosystem of stupid
> tools and facilities, none (or very few) of which monitor link status
> in order to do handle things intelligently.  DHCP servers are a great
> example.  DHCP servers spit out broadcast discover packets before we
> even have a link up.
> 
> Filling this void is NetworkManager, which does listen on a netlink
> socket for device state changes, hotplug, etc.  And in response it
> explicitly tells various facilities to reprobe the network.
> 
> This is why I'm reluctant to give NetworkManager a hard time, because
> whilst it's a huge beast, it is at least trying to do the right thing.
> :-)

Oh, it's not that huge :)  What's huge is the networking problem space,
and thus when you build something that tries to be aware of much of
what's going on (which in the modern world you do really need) it's
going to need to talk to all sorts of different subsystems...  Such is
life :(  It's sufficiently module though that if you have no need to
modems, or PPP, or WiFi, or 802.1x, or WiMAX, you don't need to run
those parts.  Of course now we're adding bridging, VLAN, bonding, etc
support, so the amoeba gets larger.  If only people stopped adding
features to the kernel networking stack :)

Dan


^ permalink raw reply

* Re: [PATCH net-next-2.6 v2] bonding: reduce noise during init
From: Andy Gospodarek @ 2011-07-27 20:09 UTC (permalink / raw)
  To: Joe Perches; +Cc: Jay Vosburgh, Andy Gospodarek, netdev
In-Reply-To: <1311727227.15386.40.camel@Joe-Laptop>

On Tue, Jul 26, 2011 at 05:40:27PM -0700, Joe Perches wrote:
> On Tue, 2011-07-26 at 17:37 -0700, Jay Vosburgh wrote:
> > Joe Perches <joe@perches.com> wrote:
> > >I'd prefer you don't separate the format string
> > >into multiple pieces.
> > Why not?  To me, it looks easier to read split into sections
> > that don't wrap lines.
> 
> Harder to grep for a dmesg and the
> defect rate of these split formats is
> typically higher than single strings
> because of bad spacing between string
> segments.
> 

I noticed that you took some time back in late 2009 to 'consolidate' the
split format-strings present in the bonding driver at the time and I've
decided I'm fine to leave them the way they are.  The main point of my
patch was to change the output and I would like to get that included.
Here is my updated patch...


Subject: [PATCH net-next-2.6 v2] bonding: reduce noise during init

Many are using sysfs to configure bonding rather than module options, so
there is no need for bonding to throw this warning in normal cases.

Keep the message around when debugging is enabled as it might be useful
for someone desperate enough to enable debugging, but eliminate it
otherwise.

Signed-off-by: Andy Gospodarek <andy@greyhouse.net>

---
 drivers/net/bonding/bond_main.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 61265f7..b37c602 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4745,7 +4745,7 @@ static int bond_check_params(struct bond_params *params)
 		/* miimon and arp_interval not set, we need one so things
 		 * work as expected, see bonding.txt for details
 		 */
-		pr_warning("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details.\n");
+		pr_debug("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details.\n");
 	}
 
 	if (primary && !USES_PRIMARY(bond_mode)) {
-- 
1.7.4.4




^ permalink raw reply related

* Re: [PATCH] vfs: avoid taking locks if inode not in lists
From: Christoph Hellwig @ 2011-07-27 20:44 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Christoph Hellwig, Tim Chen, Al Viro, David Miller, Andi Kleen,
	Matthew Wilcox, Anton Blanchard, npiggin, linux-kernel,
	linux-fsdevel, netdev
In-Reply-To: <1311780065.2356.18.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

On Wed, Jul 27, 2011 at 05:21:05PM +0200, Eric Dumazet wrote:
> If I am not mistaken, we can add unlocked checks on the three hot spots.
> 
> After following patch, a close(socket(PF_INET, SOCK_DGRAM, 0)) pair on
> my dev machine takes ~3us instead of ~9us.
> 
> Maybe its better to split it in three patches, just let me know.

I think three patches would be a lot cleaner.

As for safety of the unlocked checks:

 - inode are either hashed when created or never, so that one looks
   fine.
 - same for the sb list.
 - the writeback list is a bit more dynamic as we move things around
   quite a bit.  But in additon to the inode_wb_list_del call from
   evict() it only ever gets remove in writeback_single_inode, which
   for a freeing inode can only be called from the callers of evict().

Btw, I wonder if you should micro-optimize things a bit further by
moving the unhashed checks from the deletion functions into the callers
and thus save a function call for each of them.


^ permalink raw reply

* Re: [PATCH] vfs: avoid taking locks if inode not in lists
From: Andi Kleen @ 2011-07-27 20:59 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Eric Dumazet, Tim Chen, Al Viro, David Miller, Andi Kleen,
	Matthew Wilcox, Anton Blanchard, npiggin, linux-kernel,
	linux-fsdevel, netdev
In-Reply-To: <20110727204415.GA13308@infradead.org>

> Btw, I wonder if you should micro-optimize things a bit further by
> moving the unhashed checks from the deletion functions into the callers
> and thus save a function call for each of them.

If the caller is in the same file modern gcc is able to do that automatically
if you're lucky enough ("partial inlining")

I would not uglify the code for it.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply

* Re: [PATCH] vfs: avoid taking locks if inode not in lists
From: Christoph Hellwig @ 2011-07-27 21:01 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Christoph Hellwig, Eric Dumazet, Tim Chen, Al Viro, David Miller,
	Matthew Wilcox, Anton Blanchard, npiggin, linux-kernel,
	linux-fsdevel, netdev
In-Reply-To: <20110727205957.GC8006@one.firstfloor.org>

On Wed, Jul 27, 2011 at 10:59:57PM +0200, Andi Kleen wrote:
> > Btw, I wonder if you should micro-optimize things a bit further by
> > moving the unhashed checks from the deletion functions into the callers
> > and thus save a function call for each of them.
> 
> If the caller is in the same file modern gcc is able to do that automatically
> if you're lucky enough ("partial inlining")
> 
> I would not uglify the code for it.

Depending on how you look at it the code might actually be a tad
cleaner.  One of called functions is outside of inode.c.


^ permalink raw reply

* [RFC net-next PATCH 0/4] Add new settings for ethtool
From: Greg Rose @ 2011-07-27 22:17 UTC (permalink / raw)
  To: netdev; +Cc: davem, bhutchings, jeffrey.t.kirsher

This series of patches implements several changes to the way SR-IOV
can be configured to allow for per port assignment of the number of
VFs.

In addition, patches 1/4 and 2/4 of this patch series adds a new flag
bit to the PCI device structure dev_flags that is used by the kvm
module to indicate when a PCI device is assigned to a guest virtual
machine (VM) and changes to the ixgbe driver to make use of this new
bit.  The advantage these first two patches provide is to prevent
destruction of virtual functions (VFs) by the physical function
(PF) driver when it is removed if the PF driver finds that any VFs
are still assigned to guest VMs.  This in turn prevents panics that
will result when the VF device disappears from the guest VM without
notification.

Patches 3/4 and 4/4 of this patch series implement new settings for
Ethtool that allow for reconfiguration of the number of VFs per PF
without resorting to use of the module parameter which is only capable
of setting a single number of VFs per PF, to set the number of VM
queues for devices that support it and an on/off switch for the
anti-spoofing feature.  A follow on patch for Ethtool that implements
these settings will be sent separately.

Patches 1/4 and 2/4 of this series could also be considered for
submission to the current net tree and also the stable tree since
they prevent catastrophic results when the PF driver is removed
while VFs are assigned to guest VMs.

---

Greg Rose (4):
      ixgbe: Add support for new ethtool settings
      ethtool: Add new set commands
      ixgbe: Reconfigure SR-IOV Init
      pci: Add flag indicating device has been assigned by KVM


 drivers/net/ixgbe/ixgbe.h         |    2 
 drivers/net/ixgbe/ixgbe_ethtool.c |   96 ++++++++++++++++++
 drivers/net/ixgbe/ixgbe_main.c    |  109 ++------------------
 drivers/net/ixgbe/ixgbe_sriov.c   |  197 +++++++++++++++++++++++++++++++++++++
 drivers/net/ixgbe/ixgbe_sriov.h   |    6 +
 drivers/net/ixgbe/ixgbe_type.h    |    4 +
 include/linux/ethtool.h           |   11 ++
 include/linux/pci.h               |    2 
 virt/kvm/assigned-dev.c           |    2 
 virt/kvm/iommu.c                  |    4 +
 10 files changed, 333 insertions(+), 100 deletions(-)

-- 
Signed-off-by: Greg Rose <gregory.v.rose@intel.com>

^ permalink raw reply

* [RFC net-next PATCH 1/4] pci: Add flag indicating device has been assigned by KVM
From: Greg Rose @ 2011-07-27 22:17 UTC (permalink / raw)
  To: netdev; +Cc: davem, bhutchings, jeffrey.t.kirsher
In-Reply-To: <20110727221406.8435.44324.stgit@gitlad.jf.intel.com>

Device drivers that create and destroy SR-IOV virtual functions via
calls to pci_enable_sriov() and pci_disable_sriov can cause catastrophic
failures if they attempt to destroy VFs while they are assigned to
guest virtual machines.  By adding a flag for use by the KVM module
to indicate that a device is assigned a device driver can check that
flag and avoid destroying VFs while they are assigned and avoid system
failures.

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
---

 include/linux/pci.h     |    2 ++
 virt/kvm/assigned-dev.c |    2 ++
 virt/kvm/iommu.c        |    4 ++++
 3 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 2d29218..a297ca2 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -174,6 +174,8 @@ enum pci_dev_flags {
 	PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) 1,
 	/* Device configuration is irrevocably lost if disabled into D3 */
 	PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2,
+	/* Provide indication device is assigned by KVM */
+	PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) 4,
 };
 
 enum pci_irq_reroute_variant {
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index 6cc4b97..f401de1 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -205,6 +205,8 @@ static void kvm_free_assigned_device(struct kvm *kvm,
 	else
 		pci_restore_state(assigned_dev->dev);
 
+	assigned_dev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
+
 	pci_release_regions(assigned_dev->dev);
 	pci_disable_device(assigned_dev->dev);
 	pci_dev_put(assigned_dev->dev);
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 62a9caf..cffc530 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -181,6 +181,8 @@ int kvm_assign_device(struct kvm *kvm,
 			goto out_unmap;
 	}
 
+	pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
+
 	printk(KERN_DEBUG "assign device %x:%x:%x.%x\n",
 		assigned_dev->host_segnr,
 		assigned_dev->host_busnr,
@@ -209,6 +211,8 @@ int kvm_deassign_device(struct kvm *kvm,
 
 	iommu_detach_device(domain, &pdev->dev);
 
+	pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
+
 	printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n",
 		assigned_dev->host_segnr,
 		assigned_dev->host_busnr,


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox