Netdev List
 help / color / mirror / Atom feed
* [PATCH v4 23/27] HFI: Define packet header formats and window register offsets
From: dykmanj @ 2011-04-25 21:24 UTC (permalink / raw)
  To: netdev
  Cc: Jim Dykman, Piyush Chaudhary, Fu-Chung Chang,  William S. Cadden,
	 Wen C. Chen, Scot Sakolish, Jian Xiao,  Carol L. Soto,
	 Sarah J. Sheppard
In-Reply-To: <1303766647-30156-1-git-send-email-dykmanj@linux.vnet.ibm.com>

From: Jim Dykman <dykmanj@linux.vnet.ibm.com>

Signed-off-by:  Piyush Chaudhary <piyushc@linux.vnet.ibm.com>
Signed-off-by:  Jim Dykman <dykmanj@linux.vnet.ibm.com>
Signed-off-by:  Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
Signed-off-by:  William S. Cadden <wscadden@linux.vnet.ibm.com>
Signed-off-by:  Wen C. Chen <winstonc@linux.vnet.ibm.com>
Signed-off-by:  Scot Sakolish <sakolish@linux.vnet.ibm.com>
Signed-off-by:  Jian Xiao <jian@linux.vnet.ibm.com>
Signed-off-by:  Carol L. Soto <clsoto@linux.vnet.ibm.com>
Signed-off-by:  Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
---
 include/linux/hfi/hfidd_pkt_formats.h |  338 +++++++++++++++++++++++++++++++++
 include/linux/hfi/hfidd_regs.h        |  212 +++++++++++++++++++++
 2 files changed, 550 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/hfi/hfidd_pkt_formats.h
 create mode 100644 include/linux/hfi/hfidd_regs.h

diff --git a/include/linux/hfi/hfidd_pkt_formats.h b/include/linux/hfi/hfidd_pkt_formats.h
new file mode 100644
index 0000000..ee8d385
--- /dev/null
+++ b/include/linux/hfi/hfidd_pkt_formats.h
@@ -0,0 +1,338 @@
+/*
+ * hfidd_pkt_formats.h
+ *
+ * HFI device driver for IBM System p
+ *
+ *  Authors:
+ *      Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
+ *      William S. Cadden <wscadden@linux.vnet.ibm.com>
+ *      Wen C. Chen <winstonc@linux.vnet.ibm.com>
+ *      Scot Sakolish <sakolish@linux.vnet.ibm.com>
+ *      Jian Xiao <jian@linux.vnet.ibm.com>
+ *      Carol L. Soto <clsoto@linux.vnet.ibm.com>
+ *      Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
+ *
+ *  (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef _HFI_PKT_FORMATS_H
+#define _HFI_PKT_FORMATS_H
+
+#define HFI_HW_DIRECT_ROUTE		0
+#define HFI_SW_INDIRECT_ROUTE		1
+#define HW_DIRECT_STRIPING_ROUTE	2
+#define HW_INDIRECT_STRIPING_ROUTE	3
+
+/* HFI packet header format */
+
+struct immediate_send_hdr {
+	unsigned int	reserved1:8;
+	unsigned int	cop_type:8;	/* HFI coprocessor type is 0x3A */
+	unsigned int	reserved2:7;
+	unsigned int	src_win:9;	/* Hi bit is hfi# */
+} __packed;
+
+struct packet_type_hdr {
+	unsigned int	header_type:7;	/* See hfi_packet_type for types */
+	unsigned int	send_origin:1;	/*  0: from Send/RDMA FIFO */
+					/*  1: from Immediate Send */
+} __packed;
+
+struct base_hdr {
+	unsigned int	reserve1:1;
+	unsigned int	dst_isr:14;	/* destination ISR */
+	unsigned int	dst_win:9;	/* destination window.
+					   Hi bit is hfi num */
+	unsigned int	reserve2:1;
+
+	unsigned int	src_isr:14;	/* src ISR, Set by hardware */
+	unsigned int	src_win:9;	/* src window, Set by hardware.
+					   Hi bit is hfi num */
+	unsigned int	src_cau:1;
+	unsigned int	dst_cau:1;
+	unsigned int	reserve3:2;
+
+	unsigned int	pkt_len:4;	/* In cache lines. 0 means 16 */
+	unsigned int	reserve4:8;
+	unsigned int	trc_enable:1;	/* Request ISR trace info
+					   send/recv, IP packest only */
+	unsigned int	ctr_incr:1;	/* Request send and receive
+					   counter increments */
+	unsigned int	sii:1;		/* Source Side Immediate Int */
+	unsigned int	dii:1;		/* Dest Side Immediate Int */
+	unsigned int	reserve5:3;
+	unsigned int	pkt_valid:1;	/* Set by hardware
+					   Toggles when rFIFO wraps */
+	unsigned int	status:5;	/* Set by hardware completion status */
+	unsigned int	route_control:2;/* ISR use only	*/
+	unsigned int	ind_node:9;	/* ISR use only	*/
+} __packed;
+
+struct hfi_rdma_extended_hdr {			/* 48B */
+	unsigned long long	source_ea;	/* Full RDMA only, byte
+						   boundary */
+	unsigned long long	dest_ea;	/* byte boundary, independent of
+						   source data boundary	*/
+	unsigned int	pkt_len:4;		/* In cache lines, with
+						   exceptions */
+	unsigned int	reserved1:3;
+	unsigned int	msg_len:25;		/* In bytes.
+						   FullRDMA: Max 32MB
+						   (encoded as 0)
+						   HalfRDMA: Max 2kB
+						   SmallRDMA: Max 16B */
+	unsigned int	reserved2:5;
+	unsigned int	rcxt_offset:20;		/* into window's rcxt area */
+	unsigned int	reserved3:1;
+	unsigned int	breakup:6;		/* Message Breakup count */
+	unsigned int	epoch:32;
+	unsigned int	seq_num:14;
+	unsigned int	pkt_cnt:18;		/* FullRDMA only */
+	unsigned int	reserved4:3;
+	unsigned int	rcxt_index:5;		/* 1-31 allowed */
+	unsigned int	read_indicator:1;	/* 1 for FULL RDMA reads */
+	unsigned int	src_completion:1;
+	unsigned int	src_completion_notify:1;
+	unsigned int	dst_completion_notify:1;
+	unsigned int	status:5;		/* Notification packet only. */
+	unsigned int	reserved6:3;
+	unsigned int	fetch:1;		/* SmallRDMA only. */
+	unsigned int	opcode:3;
+	unsigned int	reserved7:8;
+	unsigned int	reserved8:32;
+	unsigned long long	cookie;		/* Protocol cookie */
+} __packed;
+
+struct gups_rdma_element {			/* 32B. Used without Base
+						   header */
+	unsigned int	job_id:32;
+	unsigned int	header_type:7;		/* Use GUPS types only */
+	unsigned int	reserved1:2;
+	unsigned int	dst_isr:14;		/* destination ISR */
+	unsigned int	dst_win:9;		/* destination window.
+						   Hi bit is hfi# */
+	unsigned int	reserved2:15;
+	unsigned int	ctr_incr:1;		/* Request send and receive
+						   counter increments */
+	unsigned int	isr_route_hdr:16;	/* ISR use only	*/
+	unsigned int	reserved3:32;
+
+	union {
+		unsigned long long	addr;
+		struct {
+			unsigned int	dest_ea_hi:32;
+			unsigned int	dest_ea_lo:29;	/* 8-byte align */
+			unsigned int	opcode:3;	/* Use HFI_OPCODE* */
+		} bits;
+	} dest_ea;
+
+	unsigned long long     payload;
+} __packed;
+
+struct hfi_cau_extended_hdr {		/* 32B */
+	unsigned int		group_id:27;
+	unsigned int		dst_sub_id:9;
+	unsigned int		src_sub_id:9;
+	unsigned int		reserved1:3;
+	unsigned int		op_attr_unsigned:1;	/* 0->Signed */
+	unsigned int		op_attr_64bit:1;	/* 0->32 bit */
+	unsigned int		op_attr_float:1;	/* 0->Fixed  */
+	unsigned int		function:5;
+	unsigned int		payload_cnt:8;		/* bitmask */
+	unsigned int		seq_num:32;
+	unsigned int		reserved2:6;
+	unsigned int		entry:1;		/* 0->A, 1->B */
+	unsigned int		reserved3:25;
+	unsigned long long	cookie_left;		/* Protocol Cookie */
+	unsigned long long	cookie_right;
+} __packed;
+
+struct hfi_hdr {
+	union {
+		struct immediate_send_hdr immediate_hdr;
+		unsigned int		job_id;
+	} id;
+
+	struct packet_type_hdr		type;
+	struct base_hdr			base_hdr;
+} __packed;
+
+struct hfi_rdma_hdr {
+	struct hfi_hdr		hfi_hdr;
+	struct hfi_rdma_extended_hdr rdma_ext;
+	/*
+	 * Pad enough for the payload to be aligned with the dest address.
+	 * SmallRDMA: no padding FullRDMA: packets 2-N pad 64 bytes
+	 */
+} __packed;
+
+
+struct hfi_rdma_pkt {
+	struct hfi_hdr		hfi_hdr;
+	struct hfi_rdma_extended_hdr rdma_ext;
+	/*
+	 * Pad enough for the payload to be aligned with the dest address.
+	 * SmallRDMA: no padding FullRDMA: packets 2-N pad 64 bytes
+	 */
+	char			padding_slash_payload[1984];
+} __packed;
+
+
+#define CACHE_LINE_SIZE 0x80
+#define CACHE_LINE_SHIFT 0x7
+/*
+ * Encode/Decode macros/inlines for bit fields
+ */
+static inline unsigned int hfi_bytes_to_cacheline(unsigned int bytes)
+{
+	return (bytes + (CACHE_LINE_SIZE - 1)) >> CACHE_LINE_SHIFT;
+}
+
+static inline unsigned int HFI_CACHELINES_TO_BYTES(unsigned int cache_lines)
+{
+	return cache_lines << CACHE_LINE_SHIFT;
+}
+
+static inline unsigned int hfi_cachelines_to_pktlen(unsigned int cachelines)
+{
+	return cachelines & 0xf;  /* No overflow checking */
+}
+
+static inline unsigned int hfi_pktlen_to_cachelines(unsigned int pktlen)
+{
+	return pktlen ? pktlen : 16;	/* 0 pktlen == 16 cache lines */
+}
+
+struct hfi_gups_rdma_pkt {
+	struct gups_rdma_element element[4];
+} __packed;
+
+struct hfi_collective_pkt {
+	struct hfi_hdr		hfi_hdr;
+	struct hfi_cau_extended_hdr cau_ext;
+	char			payload[64];
+	char			pad[16];
+} __packed;
+
+/*
+ * Opcodes for RDMA Extended Headers and GUPS-RDMA Elements
+ */
+#define HFI_OPCODE_ADD			0x0
+#define HFI_OPCODE_AND			0x1
+#define HFI_OPCODE_OR			0x2
+#define HFI_OPCODE_XOR			0x3
+#define HFI_OPCODE_RESERVED		0x4
+#define HFI_OPCODE_CMP_SWAP_NOT_EQUAL	0x5
+#define HFI_OPCODE_CMP_SWAP_EQUAL	0x6
+#define HFI_OPCODE_SWAP			0x7
+/* Fetch can be OR'd with any of the above Opcodes (SmallRDMA only) */
+#define HFI_OPCODE_FETCH		0x8
+
+enum hfi_cau_type {
+	HFI_CAU_TYPE_FLOAT			= 0x01,
+	HFI_CAU_TYPE_64BIT			= 0x02,
+	HFI_CAU_TYPE_UNSIGNED			= 0x04
+};
+
+enum hfi_cau_function {
+	HFI_CAU_FUNCTION_NOP			= 0x00,
+	HFI_CAU_FUNCTION_SUM			= 0x01,
+	HFI_CAU_FUNCTION_MIN			= 0x02,
+	HFI_CAU_FUNCTION_MAX			= 0x03,
+	HFI_CAU_FUNCTION_AND			= 0x11,
+	HFI_CAU_FUNCTION_XOR			= 0x16,
+	HFI_CAU_FUNCTION_OR			= 0x17
+};
+
+enum hfi_pkt_status {
+	HFI_PKT_STATUS_GOOD			= 0x0,
+	HFI_PKT_STATUS_DESCRIPTOR_FAILURE	= 0x1,
+	HFI_PKT_STATUS_PACKET_KILLED		= 0x2,
+	HFI_PKT_STATUS_UNEXPECTED_LAST_FLIT	= 0x4,
+	HFI_PKT_STATUS_POWERBUS_MASTER		= 0x8,
+	HFI_PKT_STATUS_TRANSLATION_FAULT	= 0x10
+
+};
+
+enum hfi_rdma_pkt_status {
+	HFI_RDMA_PKT_STATUS_GOOD		= 0x0,
+	HFI_RDMA_PKT_STATUS_MIGRATION_CONFLICT	= 0x1,
+	HFI_RDMA_PKT_STATUS_PACKET_KILLED	= 0x4,
+	HFI_RDMA_PKT_STATUS_POWERBUS_MASTER	= 0x8,
+	HFI_RDMA_PKT_STATUS_TRANSLATION_FAULT	= 0x10
+
+};
+
+enum hfi_packet_type {
+	HFI_SEND_RECEIVE			= 0x00,	/* send FIFO -> receive
+							   FIFO */
+
+	HFI_IP_WITH_PAYLOAD			= 0x08,	/* send FIFO -> receive
+							   FIFO for IP */
+	HFI_IP_MULTICAST_WITH_PAYLOAD		= 0x09,	/* ISR use only */
+	HFI_IP_WITH_DESCRIPTORS			= 0x0C,	/* IP send/recv with
+							   indirection */
+	HFI_IP_MULTICAST_WITH_DESCRIPTORS	= 0x0D,	/* IP send/recv with
+							   indirection */
+
+	HFI_FULL_RDMA_WRITE			= 0x10,
+	HFI_FULL_RDMA_READ			= 0x11,
+	HFI_FULL_RDMA_COMPLETION		= 0x12,	/* Completion
+							   notification */
+	HFI_FULL_RDMA_PREFENCE			= 0x13,	/* Guarantee RDMA FIFO
+							   processing is
+							   complete */
+	HFI_FULL_RDMA_FENCE			= 0x14,	/* Guarantee RDMA FIFO
+							   processing is
+							   complete */
+	HFI_FULL_RMDA_WRITE_ALIAS		= 0x15,	/* HW use only */
+
+	HFI_HALF_RDMA_WRITE			= 0x18,	/* send FIFO to target
+							   EA */
+	HFI_HALF_RDMA_READ			= 0x19,	/* target EA to receive
+							   FIFO */
+	HFI_HALF_RDMA_COMPLETION		= 0x1A,	/* Completion
+							   notification */
+	HFI_HALF_RDMA_READ_ALIAS		= 0x1D,	/* HW use only */
+
+	HFI_SMALL_RDMA_WRITE			= 0x20,	/* Remote atomic
+							   operations */
+	HFI_SMALL_RDMA_COMPLETION		= 0x22,	/* Completion
+							   notification */
+	HFI_SMALL_RDMA_COMPLETION_WITH_FETCH	= 0x23,	/* Completion
+							   notification */
+
+	HFI_GUPS_RDMA_1ELEMENT			= 0x28,	/* GUPS-RDMA, 1
+							   element */
+	HFI_GUPS_RDMA_2ELEMENT			= 0x29,	/* GUPS-RDMA,
+							   2 element */
+	HFI_GUPS_RDMA_3ELEMENT			= 0x2A,	/* GUPS-RDMA, 3
+							   element */
+	HFI_GUPS_RDMA_4ELEMENT			= 0x2B,	/* GUPS-RDMA, 4
+							   element */
+
+	HFI_CAU_REDUCE				= 0x30,	/* Collective Reduce */
+	HFI_CAU_MULTICAST			= 0x31,	/* Collective
+							   Multicast */
+	HFI_CAU_ACK				= 0x32,	/* Collective Ack */
+	HFI_CAU_RETRANS_REQ			= 0x33,	/* Collective
+							   Retransmit Req */
+	HFI_LAST_HEADER
+};
+
+#endif /* _HFI_PKT_FORMATS_H */
diff --git a/include/linux/hfi/hfidd_regs.h b/include/linux/hfi/hfidd_regs.h
new file mode 100644
index 0000000..864e0e6
--- /dev/null
+++ b/include/linux/hfi/hfidd_regs.h
@@ -0,0 +1,212 @@
+/*
+ * hfidd_regs.h
+ *
+ * HFI device driver for IBM System p
+ *
+ *  Authors:
+ *      Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
+ *      William S. Cadden <wscadden@linux.vnet.ibm.com>
+ *      Wen C. Chen <winstonc@linux.vnet.ibm.com>
+ *      Scot Sakolish <sakolish@linux.vnet.ibm.com>
+ *      Jian Xiao <jian@linux.vnet.ibm.com>
+ *      Carol L. Soto <clsoto@linux.vnet.ibm.com>
+ *      Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
+ *
+ *  (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef HFIDD_REGS_H
+#define HFIDD_REGS_H
+
+/* MMIO Registers */
+/* These are the registers with User Privilege Level */
+
+/* Window Control */
+#define HFI_JOB_ID_SEND		0x0000	/* 32: Send Side Job ID */
+#define HFI_JOB_ID_RECV		0x0008	/* 32: Send Side Job ID */
+#define HFI_WINDOW_STATE	0x0010	/*  1: 0=closed, 1=active */
+#define HFI_SEND_BUSY		0x0020	/* Send Window Busy */
+#define HFI_RECV_BUSY		0x0028	/* Rcv Window Busy */
+
+/* Addr Translation */
+#define HFI_LPAR_ID		0x0040	/* 10: Partition ID */
+#define HFI_PROTECTION_DOMAIN	0x0048	/* 32: used for addr xlat */
+#define HFI_XLAT_BYPASS		0x0050	/*  1:			  */
+
+/* Imm Send */
+#define HFI_IMM_SEND_BUF_ATTRBS	0x0080	/* 40: Immed Send Buffer Attributes */
+#define HFI_IMM_BUF_INUSE	0x0090	/* Immed Send Buffers In Use */
+#define HFI_IMM_FINI_COUNT	0x00A0	/* Immed Send Buffer Finished Count */
+#define HFI_IMM_FINI_FREQUENCY	0x00A8	/* Immed Send Finished Frequency */
+
+/* Send Fifo */
+#define HFI_SFIFO_DB_REG	0x0110	/* 16: Send Fifo Doorbell */
+#define HFI_SFIFO_BASE_ADDR	0x0120	/* 64: Send Fifo Effective Addr */
+#define HFI_SFIFO_LKEY		0x0128	/* 32: Send Fifo Local Key */
+#define HFI_SFIFO_PULL_OFF	0x0140	/* 64: Send Fifo Pull Offset */
+#define HFI_SFIFO_SIZE		0x0148	/*  4: size from 4K to 8M   */
+#define HFI_SFIFO_PEND_COUNT	0x0150	/* 16: Send Fifo Pending Count */
+#define HFI_SFIFO_FINI_POLARITY	0x0158	/*  1: Send Fifo Finish Polarity Bit */
+#define HFI_SFIFO_FINI_ADDR	0x0180	/* 64: Send Fifo Finish Effective
+					   Addr */
+#define HFI_IMM_FINI_ADDR	0x0188	/* Immed Send Finished Address */
+#define HFI_SFIFO_INTR_CNTL	0x01C8	/* xx: Send Fifo Interrupt Control */
+
+/* Full-RDMA Send Fifo */
+#define HFI_FRDMA_DB_REG	0x0210	/* Full-RDMA Send Fifo Doorbell */
+#define HFI_FRDMA_BASE_ADDR	0x0220	/* Full-RDMA Send Fifo Effective
+					   Address */
+#define HFI_FRDMA_LKEY		0x0228	/* Full-RDMA Send Fifo Local Key */
+#define HFI_FRDMA_PULL_REG	0x0240	/* Full-RDMA Send Fifo Pull Offset */
+#define HFI_FRDMA_SIZE		0x0248	/* Full-RDMA Send Size */
+#define HFI_FRDMA_PEND_REG	0x0250	/* Full-RDMA Send Fifo Pending Count */
+#define HFI_FRDMA_FINI_POLARITY	0x0248	/* Full-RDMA Send Fifo Finished
+					   Polarity */
+#define HFI_FRDMA_BREAKUP_REG	0x0260	/* Full-RDMA Send Fifo Breakup Count */
+#define HFI_FRDMA_FINI_ADDR	0x0280	/* Full-RDMA Send Fifo Finished
+					   Address */
+#define HFI_FRDMA_INTR_REG	0x02C8	/* Full-RDMA Send Fifo Interrupt
+					   Control */
+#define HFI_FRDMA_FENCE_CNT	0x0578	/* Full-RDMA Send Fifo Fence Count */
+
+/* Send Interrupts */
+#define HFI_SINTR_CONTROL_REG	0x02E0	/* Send Force Interrupt Control */
+#define HFI_SINTR_STATUS_REG	0x02E8	/* Send Intr Status */
+
+/* Receive Fifo */
+#define HFI_RFIFO_INC_FSLOT_REG	0x0310	/* Receive Fifo Inc Free Slot Count */
+#define HFI_RFIFO_BASE_ADDR	0x0320	/* Receive Fifo Effective Address */
+#define HFI_RFIFO_LKEY		0x0328	/* Receive Fifo Local Key */
+#define HFI_RFIFO_PUSH_REG	0x0340	/* Receive Fifo Push Offset */
+#define HFI_RFIFO_SIZE		0x0348	/*  4: range from 4K to 8M   */
+#define HFI_RFIFO_FSLOT_CNT_REG	0x0350	/* Receive Fifo Free Slot Count */
+#define	HFI_RFIFO_FINI_POLARITY	0x0358	/* Receive Fifo Finished Polarity */
+#define HFI_RFIFO_INJ_TH_REG	0x0360	/* Receive Fifo Cache Injection
+					   Threshold */
+#define HFI_RFIFO_OUT_TH_REG	0x0368	/* Receive Fifo Out-of-Order
+					   Threshold */
+#define HFI_RFIFO_OUT_EVENT_REG	0x03C0	/* Receive Fifo Out-of-Order Event
+					   Control */
+#define HFI_RFIFO_INTR_REG	0x03C8	/* Receive Fifo Intr Control */
+
+/* Receive Interrupts */
+#define HFI_RINTR_CONTROL_REG	0x03E0	/* Receive Force Intr Control */
+#define HFI_RINTR_STATUS_REG	0x03E8	/* Receivce Intr Status */
+
+/* IP 2K Free Space Descriptor Fifo */
+#define HFI_IP2K_INC_AVAIL_REG	0x0410	/* IP 2k FSD Fifo Inc Avail Slot
+					   Count */
+#define HFI_IP2K_BASE_ADDR	0x0420	/* IP 2k FSD Fifo Effective Address */
+#define HFI_IP2K_LKEY		0x0428	/* IP 2k FSD Fifo Local Key */
+#define HFI_IP2K_PULL_REG	0x0440	/* IP 2k FSD Fifo Pull Offset */
+#define HFI_IP2K_SIZE		0x0448	/* IP 2k FSD Fifo Size */
+#define HFI_IP2K_ASLOT_CNT_REG	0x0450	/* IP 2k FSD Fifo Avail Slot Count */
+
+/* RDMA Pending Fifo */
+#define HFI_RDMAP_BASE_ADDR	0x0520	/* RDMA Pending Fifo Efective Address */
+#define HFI_RDMAP_LKEY		0x0528	/* RDMA Pending Fifo Local Key */
+#define HFI_RDMAP_PULL_REG	0x0540	/* RDMA Pending Fifo Pull Offset */
+#define HFI_RDMAP_SIZE		0x0548	/* RDMA Pending Fifo Size */
+#define HFI_RDMAP_READ_TH_REG	0x0568	/* RDMA Pending Fifo Read Request
+					   Threshold */
+#define HFI_RDMAP_PUSH_REG	0x0570	/* RDMA Pending Fifo Push Offset */
+#define HFI_RDMAP_PEND_REG	0x0550	/* RDMA Pending Fifo Pending Count */
+
+/* Send Special Fifo */
+#define HFI_SSFIFO_BASE_ADDR	0x0620	/* Send Special Fifo Effective
+					   Address */
+#define HFI_SSFIFO_LKEY		0x0628	/* Send Special Fifo Local Key */
+#define HFI_SSFIFO_PULL_REG	0x0640	/* Send Special Fifo Pull Offset */
+#define HFI_SSFIFO_SIZE		0x0648	/* Send Special Fifo Size */
+#define HFI_SSFIFO_PUSH_REG	0x0670	/* Send Special Fifo Push Offset */
+#define HFI_SSFIFO_PENG_REG	0x0650	/* Send Special Fifo Pending Count */
+
+/* IP Context */
+#define HFI_IP_MCAST_ENABLE_REG	0x0820	/* IP Send - Multi-cast Enable */
+#define HFI_IP_DISABLE_ISR_REG	0x0828	/* IP Send - Disable Src_ISR_ID
+					   Stamp */
+#define HFI_IP_PORT_VALID_REG	0x0830	/* IP Logical Port ID Valid */
+#define	HFI_IP_PORT_REG		0x0838	/* IP Logical Port ID */
+#define HFI_IP_RECV_SIZE	0x0860	/* IP Recv with Payload Max Size */
+#define HFI_IP_RCV_IMM_REG	0x0868	/* IP RCV-Immediate Max Size */
+
+/* RDMA Context */
+#define HFI_RDMA_RCXT_BASE_ADDR	0x0920	/* RDMA Rcv RCxt Effective Address */
+#define HFI_RDMA_RCXT_LKEY	0x0928	/* RDMA RCxt Local Key */
+#define	HFI_RDMA_RCXT_PAY_LKEY	0x0058	/* RDMA Payload Local Key
+					   (no-sequential) */
+#define HFI_RDMA_RCXT_EPOCH	0x0980	/* RCxt Update Epoch */
+#define HFI_RDMA_RCXT_CACHE	0x0990	/* RCxt Cache Entry Flush Request */
+
+/* Counters */
+#define HFI_PACKETS_SENT_REG	0x0C00	/* Packets Sent */
+#define HFI_PSENT_DROP_REG	0x0C08	/* Packets Dropped from Sending */
+#define HFI_PIND_SENT_CNT_REG	0x0C10	/* Packet-Indicated Send Count */
+#define HFI_IMM_SEND_CNT_REG	0x0C18	/* Immed Send Packet Count */
+#define HFI_PACKETS_RCV_REG	0x0C40	/* Packets Received */
+#define HFI_PRCV_DROP_REG	0x0C48	/* Packets Dropped from Receiving */
+#define HFI_PIND_RCV_CNT_REG	0x0C50	/* Packet-Indicated Receive Count */
+#define HFI_SBIT_EEC_CNT_REG	0x0C60	/* Single-Bit ECC Count */
+#define HFI_SBIT_EEC_TH_REG	0x0C68	/* Single-Bit ECC Threshold for Error */
+#define HFI_ADDR_XLAT_CNT_REG	0x0C80	/* Addr Xlat Wait Count */
+
+/* Misc Non-Window Registers */
+#define	HFI_NUM_WINDOWS_REG	0x0000	/* Number of Windows */
+#define HFI_AGGR_PSENT_CNT_REG	0x0100	/* Aggregate Packet Sent Count */
+#define HFI_AGGR_PSENT_DROP_REG	0x0108	/* Aggregate Packet Dropped from
+					   Sending Count */
+#define HFI_AGGR_PRCV_CNT_REG	0x0110	/* Aggregate Packet Receive Count */
+#define HFI_AGGR_PRCV_DROP_REG	0x0118	/* Aggregate Packet Dropped from
+					   Receive Count */
+#define HFI_AGGR_ISEND_CNT_REG	0x0120	/* Aggregate Immediate Send Packet
+					   Sent Count */
+#define HFI_AGGR_SR_CNT_REG	0x0128	/* Aggregate Send/Rcv Packet Send
+					   Count */
+#define HFI_AGGR_FRDMA_CNT_REG	0x0130	/* Aggregate Full-RDMA Packet Sent
+					   Count */
+#define HFI_AGGR_HRDMA_CNT_REG	0x0138	/* Aggregate Half-RDMA Packet Sent
+					   Count */
+#define HFI_AGGR_SRDMA_CNT_REG	0x0140	/* Aggregate Small-RDMA Packet Sent
+					   Count */
+#define HFI_AGGR_IP_CNT_REG	0x0148	/* Aggregate IP Packet Sent Count */
+#define HFI_AGGR_CAU_CNT_REG	0x0150	/* Aggregate CAU Packet Sent Count */
+#define HFI_AGGR_GUPS_CNT_REG	0x0158	/* Aggregate GUPS Packet Sent Count */
+#define HFI_AGGR_NOTIFS_CNT_REG 0x0170  /* Aggregate Notifications Packet Sent
+						Count*/
+
+#define HFI_RCXT_FLUSH_REG	0x0600	/* RCxt Cache Window Flush Request */
+#define HFI_PG_MIGR1_REG	0x0708	/* Page Migration Register 1 */
+#define HFI_PG_MIGR2_REG	0x0710	/* Page Migration Register 2 */
+#define HFI_PG_MIGR3_REG	0x0718	/* Page Migration Register 3 */
+#define HFI_PG_MIGR4_REG	0x0720	/* Page Migration Register 4 */
+#define HFI_PG_MIGR5_REG	0x0728	/* Page Migration Register 5 */
+#define HFI_PG_MIGR6_REG	0x0730	/* Page Migration Register 6 */
+#define HFI_PG_MIGR7_REG	0x0738	/* Page Migration Register 7 */
+#define HFI_PG_MIGR1_RESV_REG	0x0808	/* Page Migration Reservation 1 */
+#define HFI_PG_MIGR2_RESV_REG	0x0810	/* Page Migration Reservation 2 */
+#define HFI_PG_MIGR3_RESV_REG	0x0818	/* Page Migration Reservation 3 */
+#define HFI_PG_MIGR4_RESV_REG	0x0820	/* Page Migration Reservation 4 */
+#define HFI_PG_MIGR5_RESV_REG	0x0828	/* Page Migration Reservation 5 */
+#define HFI_PG_MIGR6_RESV_REG	0x0830	/* Page Migration Reservation 6 */
+#define HFI_PG_MIGR7_RESV_REG	0x0838	/* Page Migration Reservation 7 */
+#define HFI_ADDR_XLAT_WCNT_REG	0x0A20	/* Addr Xlat Wait Count */
+
+#define MASK_56_BITS	0x00ffffffffffffff
+#define MASK_40_BITS	0x000000ffffffffff
+
+#endif /* HFIDD_REGS_H */
-- 
1.7.3.5


^ permalink raw reply related

* [PATCH v4 21/27] HFI: Add send and receive interrupts
From: dykmanj @ 2011-04-25 21:24 UTC (permalink / raw)
  To: netdev
  Cc: Jim Dykman, Piyush Chaudhary, Fu-Chung Chang,  William S. Cadden,
	 Wen C. Chen, Scot Sakolish, Jian Xiao,  Carol L. Soto,
	 Sarah J. Sheppard
In-Reply-To: <1303766647-30156-1-git-send-email-dykmanj@linux.vnet.ibm.com>

From: Jim Dykman <dykmanj@linux.vnet.ibm.com>

Each window has its own interrupt for send interrupts and another for receive
interrupts.

Signed-off-by:  Piyush Chaudhary <piyushc@linux.vnet.ibm.com>
Signed-off-by:  Jim Dykman <dykmanj@linux.vnet.ibm.com>
Signed-off-by:  Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
Signed-off-by:  William S. Cadden <wscadden@linux.vnet.ibm.com>
Signed-off-by:  Wen C. Chen <winstonc@linux.vnet.ibm.com>
Signed-off-by:  Scot Sakolish <sakolish@linux.vnet.ibm.com>
Signed-off-by:  Jian Xiao <jian@linux.vnet.ibm.com>
Signed-off-by:  Carol L. Soto <clsoto@linux.vnet.ibm.com>
Signed-off-by:  Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
---
 drivers/net/hfi/core/Makefile       |    1 +
 drivers/net/hfi/core/hfidd_intr.c   |  127 +++++++++++++++++++++++++++++++++++
 drivers/net/hfi/core/hfidd_proto.h  |    3 +
 drivers/net/hfi/core/hfidd_window.c |   16 ++++-
 include/linux/hfi/hfidd_client.h    |   17 +++++
 include/linux/hfi/hfidd_internal.h  |    2 +
 6 files changed, 165 insertions(+), 1 deletions(-)
 create mode 100644 drivers/net/hfi/core/hfidd_intr.c

diff --git a/drivers/net/hfi/core/Makefile b/drivers/net/hfi/core/Makefile
index 3adf07e..d2ed86f 100644
--- a/drivers/net/hfi/core/Makefile
+++ b/drivers/net/hfi/core/Makefile
@@ -6,5 +6,6 @@ hfi_core-objs:=	hfidd_adpt.o \
 		hfidd_init.o \
 		hfidd_xlat.o \
 		hfidd_map.o \
+		hfidd_intr.o \
 		hfidd_hcalls.o
 obj-$(CONFIG_HFI) += hfi_core.o
diff --git a/drivers/net/hfi/core/hfidd_intr.c b/drivers/net/hfi/core/hfidd_intr.c
new file mode 100644
index 0000000..253de27
--- /dev/null
+++ b/drivers/net/hfi/core/hfidd_intr.c
@@ -0,0 +1,127 @@
+/*
+ * hfidd_intr.c
+ *
+ * HFI device driver for IBM System p
+ *
+ *  Authors:
+ *      Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
+ *      William S. Cadden <wscadden@linux.vnet.ibm.com>
+ *      Wen C. Chen <winstonc@linux.vnet.ibm.com>
+ *      Scot Sakolish <sakolish@linux.vnet.ibm.com>
+ *      Jian Xiao <jian@linux.vnet.ibm.com>
+ *      Carol L. Soto <clsoto@linux.vnet.ibm.com>
+ *      Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
+ *
+ *  (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/hfi/hfidd_internal.h>
+#include "hfidd_proto.h"
+
+static irqreturn_t send_intr_handler(int irq, void *data)
+{
+	struct hfidd_window *win_p = data;
+	struct hfidd_acs *p_acs;
+
+	p_acs = hfidd_global.p_acs[win_p->ai];
+	if (p_acs == NULL)
+		return IRQ_HANDLED;
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t recv_intr_handler(int irq, void *data)
+{
+	struct hfidd_window *win_p = data;
+	struct hfidd_acs *p_acs;
+
+	p_acs = hfidd_global.p_acs[win_p->ai];
+	if (p_acs == NULL)
+		return IRQ_HANDLED;
+
+	return IRQ_HANDLED;
+}
+
+static inline void hfidd_clear_interrupt(unsigned int int_level,
+			struct hfidd_window *win_p)
+{
+	ibmebus_free_irq(int_level, win_p);
+}
+
+static int hfidd_init_interrupt(struct hfidd_acs *p_acs,
+		struct hfidd_window *win_p ,
+		irqreturn_t (*handler)(int, void *),
+		const char *name,
+		unsigned int int_level)
+{
+	int rc;
+
+	rc = ibmebus_request_irq(int_level, handler, IRQF_DISABLED, name,
+			win_p);
+	if (rc != 0) {
+		dev_printk(KERN_ERR, p_acs->hfidd_dev,
+			"hfidd_init_interrupt: request_irq failed for "
+			"int_level 0x%x rc %d\n", int_level, rc);
+		return rc;
+	}
+	return rc;
+}
+
+int hfidd_init_win_interrupt(struct hfidd_acs *p_acs,
+		struct hfidd_window *win_p)
+{
+	int rc;
+
+	/* init send interrupt handler */
+	snprintf(win_p->send_name, IRQ_NAME_SIZE - 1, "%s%d-send%d",
+		HFIDD_DEV_NAME, p_acs->index, win_p->index);
+	rc = hfidd_init_interrupt(p_acs, win_p, send_intr_handler,
+		win_p->send_name, win_p->send_intr);
+	if (rc != 0) {
+		dev_printk(KERN_ERR, p_acs->hfidd_dev,
+			"hfidd_init_win_interrupt: send int failed, "
+			"rc = 0x%x\n", rc);
+		return rc;
+	}
+
+	/* init recv interrupt handler */
+	snprintf(win_p->recv_name, IRQ_NAME_SIZE - 1, "%s%d-recv%d",
+		HFIDD_DEV_NAME, p_acs->index, win_p->index);
+	rc = hfidd_init_interrupt(p_acs, win_p, recv_intr_handler,
+		win_p->recv_name, win_p->recv_intr);
+	if (rc != 0) {
+		dev_printk(KERN_ERR, p_acs->hfidd_dev,
+			"hfidd_init_win_interrupt: recv int failed, "
+			"rc = 0x%x\n", rc);
+		hfidd_clear_interrupt(win_p->send_intr, win_p);
+		return rc;
+	}
+	return 0;
+}
+
+void hfidd_clear_win_interrupt(struct hfidd_window *win_p)
+{
+	if (win_p->send_intr != 0) {
+		hfidd_clear_interrupt(win_p->send_intr, win_p);
+		win_p->send_intr = 0;
+	}
+	if (win_p->recv_intr != 0) {
+		hfidd_clear_interrupt(win_p->recv_intr, win_p);
+		win_p->recv_intr = 0;
+	}
+}
diff --git a/drivers/net/hfi/core/hfidd_proto.h b/drivers/net/hfi/core/hfidd_proto.h
index f531dcd..af88f0b 100644
--- a/drivers/net/hfi/core/hfidd_proto.h
+++ b/drivers/net/hfi/core/hfidd_proto.h
@@ -73,6 +73,9 @@ int hfidd_query_interface(struct hfidd_acs *p_acs, unsigned int subtype,
 int hfidd_start_nmmu(struct hfidd_acs *p_acs);
 int hfidd_start_interface(struct hfidd_acs *p_acs);
 int hfidd_stop_interface(struct hfidd_acs *p_acs, unsigned int hfi_id);
+int hfidd_init_win_interrupt(struct hfidd_acs *p_acs,
+		struct hfidd_window *win_p);
+void hfidd_clear_win_interrupt(struct hfidd_window *win_p);
 long long hfi_start_nmmu(u64 chip_id, void *nmmu_info);
 long long hfi_stop_nmmu(u64 chip_id);
 long long hfi_open_window(u64 unit_id, u64 win_id, u64 flag,
diff --git a/drivers/net/hfi/core/hfidd_window.c b/drivers/net/hfi/core/hfidd_window.c
index fd692eb..6864eae 100644
--- a/drivers/net/hfi/core/hfidd_window.c
+++ b/drivers/net/hfi/core/hfidd_window.c
@@ -1049,6 +1049,15 @@ int hfidd_open_window_func(struct hfidd_acs *p_acs, unsigned int is_userspace,
 	local_p->local_isrid = p_acs->isr;
 	win_p->client_info.local_isrid = p_acs->isr;
 
+	/* Init the send and recv interrupt handlers */
+	rc = hfidd_init_win_interrupt(p_acs, win_p);
+	if (rc) {
+		dev_printk(KERN_ERR, p_acs->hfidd_dev,
+			"hfidd_open_window_func: hfidd_init_win_interrupt "
+			"failed, rc = 0x%x\n", rc);
+		goto hfidd_open_window_func_err6;
+	}
+
 	/* Copy out the client info back to user */
 	rc = hfi_copy_to_user((void *)out_p, (void *)local_p,
 			is_userspace, sizeof(struct hfi_client_info));
@@ -1056,7 +1065,7 @@ int hfidd_open_window_func(struct hfidd_acs *p_acs, unsigned int is_userspace,
 		dev_printk(KERN_ERR, p_acs->hfidd_dev,
 			"hfidd_open_window_func: hfi_copy_to_user "
 			"failed, rc = 0x%x\n", rc);
-		goto hfidd_open_window_func_err6;
+		goto hfidd_open_window_func_err7;
 	}
 
 	spin_lock(&(win_p->win_lock));
@@ -1068,6 +1077,8 @@ int hfidd_open_window_func(struct hfidd_acs *p_acs, unsigned int is_userspace,
 	kfree(local_p);
 	return rc;
 
+hfidd_open_window_func_err7:
+	hfidd_clear_win_interrupt(win_p);
 hfidd_open_window_func_err6:
 	if (is_userspace)
 		hfidd_unmap(local_p->mmio_regs.use.kptr, PAGE_SIZE_64K);
@@ -1134,6 +1145,9 @@ int hfidd_close_window_internal(struct hfidd_acs *p_acs,
 	}
 	spin_unlock(&(win_p->win_lock));
 
+	/* Clear the send and recv interrupt handlers */
+	hfidd_clear_win_interrupt(win_p);
+
 	rc = hfi_unmap_mmio_regs(p_acs, win_p, is_userspace);
 	if (rc) {
 		dev_printk(KERN_ERR, p_acs->hfidd_dev,
diff --git a/include/linux/hfi/hfidd_client.h b/include/linux/hfi/hfidd_client.h
index 11c8973..3b2d032 100644
--- a/include/linux/hfi/hfidd_client.h
+++ b/include/linux/hfi/hfidd_client.h
@@ -121,6 +121,23 @@ struct hfi_window_info {
 	unsigned int		window;
 };
 
+/* Event Notification */
+enum hfi_event_type {
+	HFIDD_SEND		= 0,
+	HFIDD_RECV		= 1,
+	HFIDD_WIN_ERROR		= 2,
+	HFIDD_HFI_ERROR		= 3,
+	HFIDD_TERMINATE		= 4,
+	HFIDD_RELEASE_WINDOW	= 5,
+	HFIDD_CAU_ERROR		= 6,
+	HFIDD_ICS_ERROR		= 7,
+	HFIDD_HFI_READY_REG	= 8,
+	HFIDD_ROUTE_CHANGE	= 9,
+	HFIDD_IP_TRC_LVL	= 10,	/* IP Window only */
+	HFIDD_POOL_SIZE		= 11,	/* IP Window only */
+	HFIDD_NUM_EVENT_TYPES	= 12
+};
+
 #define MAX_TORRENTS            1
 #define MAX_HFI_PER_TORRENT     2
 #define MAX_HFIS                (MAX_TORRENTS * MAX_HFI_PER_TORRENT)
diff --git a/include/linux/hfi/hfidd_internal.h b/include/linux/hfi/hfidd_internal.h
index 03cac9a..a3f86b7 100644
--- a/include/linux/hfi/hfidd_internal.h
+++ b/include/linux/hfi/hfidd_internal.h
@@ -144,6 +144,8 @@ struct hfidd_global {
 	struct hfidd_acs	*p_acs[MAX_HFIS];
 };
 
+extern struct hfidd_global hfidd_global;
+
 static inline struct hfidd_window *hfi_window(struct hfidd_acs *p,
 		unsigned int idx)
 {
-- 
1.7.3.5


^ permalink raw reply related

* [PATCH v4 24/27] HFI: hfi_ip network driver
From: dykmanj @ 2011-04-25 21:24 UTC (permalink / raw)
  To: netdev
  Cc: Jim Dykman, Piyush Chaudhary, Fu-Chung Chang,  William S. Cadden,
	 Wen C. Chen, Scot Sakolish, Jian Xiao,  Carol L. Soto,
	 Sarah J. Sheppard
In-Reply-To: <1303766647-30156-1-git-send-email-dykmanj@linux.vnet.ibm.com>

From: Jim Dykman <dykmanj@linux.vnet.ibm.com>

It is a separate binary because it is not strictly necessary to use the HFI.
This patch includes module load/unload and the window open/setup with the
hfi device driver.

Signed-off-by:  Piyush Chaudhary <piyushc@linux.vnet.ibm.com>
Signed-off-by:  Jim Dykman <dykmanj@linux.vnet.ibm.com>
Signed-off-by:  Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
Signed-off-by:  William S. Cadden <wscadden@linux.vnet.ibm.com>
Signed-off-by:  Wen C. Chen <winstonc@linux.vnet.ibm.com>
Signed-off-by:  Scot Sakolish <sakolish@linux.vnet.ibm.com>
Signed-off-by:  Jian Xiao <jian@linux.vnet.ibm.com>
Signed-off-by:  Carol L. Soto <clsoto@linux.vnet.ibm.com>
Signed-off-by:  Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
---
 drivers/net/Kconfig              |    1 +
 drivers/net/hfi/Makefile         |    1 +
 drivers/net/hfi/ip/Kconfig       |    9 +
 drivers/net/hfi/ip/Makefile      |    6 +
 drivers/net/hfi/ip/hf_proto.h    |   48 +++
 drivers/net/hfi/ip/hfi_ip_main.c |  613 ++++++++++++++++++++++++++++++++++++++
 include/linux/hfi/hfi_ip.h       |  148 +++++++++
 include/linux/if_arp.h           |    1 +
 8 files changed, 827 insertions(+), 0 deletions(-)
 create mode 100644 drivers/net/hfi/ip/Kconfig
 create mode 100644 drivers/net/hfi/ip/Makefile
 create mode 100644 drivers/net/hfi/ip/hf_proto.h
 create mode 100644 drivers/net/hfi/ip/hfi_ip_main.c
 create mode 100644 include/linux/hfi/hfi_ip.h

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 1abbfd9..ddae700 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -3437,5 +3437,6 @@ config VMXNET3
 	  module will be called vmxnet3.
 
 source "drivers/net/hfi/core/Kconfig"
+source "drivers/net/hfi/ip/Kconfig"
 
 endif # NETDEVICES
diff --git a/drivers/net/hfi/Makefile b/drivers/net/hfi/Makefile
index 0440cbe..768f27c 100644
--- a/drivers/net/hfi/Makefile
+++ b/drivers/net/hfi/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_HFI)                += core/
+obj-$(CONFIG_HFI_IP)             += ip/
diff --git a/drivers/net/hfi/ip/Kconfig b/drivers/net/hfi/ip/Kconfig
new file mode 100644
index 0000000..422782a
--- /dev/null
+++ b/drivers/net/hfi/ip/Kconfig
@@ -0,0 +1,9 @@
+config HFI_IP
+	tristate "IP-over-HFI"
+	depends on NETDEVICES && INET && HFI
+	---help---
+	Support for IP over HFI. It transports IP
+	packets over HFI.
+
+	To compile the driver as a module, choose M here. The module
+	will be called hfi_ip.
diff --git a/drivers/net/hfi/ip/Makefile b/drivers/net/hfi/ip/Makefile
new file mode 100644
index 0000000..90c7dea
--- /dev/null
+++ b/drivers/net/hfi/ip/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for the HF IP interface for IBM eServer System p
+#
+obj-$(CONFIG_HFI_IP) += hfi_ip.o
+
+hfi_ip-objs :=	hfi_ip_main.o
diff --git a/drivers/net/hfi/ip/hf_proto.h b/drivers/net/hfi/ip/hf_proto.h
new file mode 100644
index 0000000..b4133b7
--- /dev/null
+++ b/drivers/net/hfi/ip/hf_proto.h
@@ -0,0 +1,48 @@
+/*
+ * hf_proto.h
+ *
+ * HF IP driver for IBM System p
+ *
+ *  Authors:
+ *      Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
+ *      William S. Cadden <wscadden@linux.vnet.ibm.com>
+ *      Wen C. Chen <winstonc@linux.vnet.ibm.com>
+ *      Scot Sakolish <sakolish@linux.vnet.ibm.com>
+ *      Jian Xiao <jian@linux.vnet.ibm.com>
+ *      Carol L. Soto <clsoto@linux.vnet.ibm.com>
+ *      Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
+ *
+ *  (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef _HF_PROTO_H_
+#define _HF_PROTO_H_
+
+extern int hfidd_open_window_func(struct hfidd_acs *p_acs,
+		u32 is_userspace,
+		struct hfi_client_info *user_p,
+		struct hfi_client_info *out_p);
+extern int hfidd_close_window_func(struct hfidd_acs *p_acs,
+		u32 is_userspace,
+		struct hfi_window_info *user_p);
+extern int hfidd_callback_register(struct hfidd_acs *p_acs,
+		struct hfi_reg_events *arg);
+extern int hfidd_callback_unregister(struct hfidd_acs *p_acs,
+		struct hfi_reg_events *arg);
+
+#endif
diff --git a/drivers/net/hfi/ip/hfi_ip_main.c b/drivers/net/hfi/ip/hfi_ip_main.c
new file mode 100644
index 0000000..0c1ebd7
--- /dev/null
+++ b/drivers/net/hfi/ip/hfi_ip_main.c
@@ -0,0 +1,613 @@
+/*
+ * hfi_ip_main.c
+ *
+ * HF IP driver for IBM System p
+ *
+ *  Authors:
+ *	Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
+ *	William S. Cadden <wscadden@linux.vnet.ibm.com>
+ *	Wen C. Chen <winstonc@linux.vnet.ibm.com>
+ *	Scot Sakolish <sakolish@linux.vnet.ibm.com>
+ *	Jian Xiao <jian@linux.vnet.ibm.com>
+ *	Carol L. Soto <clsoto@linux.vnet.ibm.com>
+ *	Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
+ *
+ *  (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/hfi/hfi_ip.h>
+#include "hf_proto.h"
+
+MODULE_AUTHOR("James Dykman <dykmanj@linux.vnet.ibm.com>, "
+		"Piyush Chaudhary <piyushc@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("IP driver v" HF_DRV_VERSION " (" HF_DRV_RELDATE ")"
+		" for IBM eServer HFI for System p");
+MODULE_VERSION(HF_DRV_VERSION);
+MODULE_LICENSE("GPL v2");
+
+struct hf_global_info		hf_ginfo;
+
+static const u8 hfi_bcast_addr[] = {
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+
+static void hf_free_tx_resource(struct hf_if *net_if)
+{
+	int	i;
+
+	if (net_if->tx_skb) {
+		for (i = 0; i <= net_if->tx_fifo.emax; i++) {
+			if (net_if->tx_skb[i])
+				dev_kfree_skb_any(net_if->tx_skb[i]);
+		}
+
+		free_pages((unsigned long)(net_if->tx_skb),
+				get_order((net_if->tx_fifo.emax + 1) *
+				sizeof(struct sk_buff *)));
+		net_if->tx_skb = 0;
+	}
+	if (net_if->tx_fifo.addr) {
+		free_pages((unsigned long)(net_if->tx_fifo.addr),
+				get_order(net_if->tx_fifo.size + PAGE_SIZE_4K));
+		net_if->tx_fifo.addr = 0;
+	}
+}
+
+static int hf_alloc_tx_resource(struct hf_net *net)
+{
+	struct hf_if *net_if = &(net->hfif);
+	int	i;
+
+	net_if->tx_fifo.size = HF_SFIFO_SIZE;
+	net_if->tx_fifo.head = 0;
+	net_if->tx_fifo.tail = 0;
+	net_if->tx_fifo.emax = HF_SFIFO_SLOTS - 1;
+	atomic_set(&net_if->tx_fifo.avail, HF_SFIFO_SLOTS - 1);
+
+	net_if->tx_fifo.addr =
+		(void *)__get_free_pages(GFP_KERNEL,
+				get_order(net_if->tx_fifo.size + PAGE_SIZE_4K));
+
+	if (net_if->tx_fifo.addr == 0) {
+		netdev_err(net->netdev, "%s: hf_alloc_tx_resource: "
+			"tx_fifo fail, size=0x%x\n",
+			net_if->name, net_if->tx_fifo.size);
+
+		return -ENOMEM;
+	}
+	memset(net_if->tx_fifo.addr, 0, net_if->tx_fifo.size + PAGE_SIZE_4K);
+
+	/* Sfifo finish vector locates at very next page of sfifo */
+	net_if->sfifo_finishvec = net_if->tx_fifo.addr + net_if->tx_fifo.size;
+	net_if->sfifo_fv_polarity = 0;
+	net_if->sfifo_slots_per_blk = HF_SFIFO_SLOTS / HF_FV_BIT_CNT;
+
+	/* allocate array to hold the tx skbs */
+	net_if->tx_skb =
+		(struct sk_buff **)__get_free_pages(GFP_KERNEL,
+		get_order((net_if->tx_fifo.emax + 1) *
+		sizeof(struct sk_buff *)));
+
+	if (net_if->tx_skb == 0) {
+		netdev_err(net->netdev,
+			"%s: hf_alloc_tx_resource: tx_skb failed\n",
+			net_if->name);
+
+		goto err_out;
+	}
+
+	for (i = 0; i <= net_if->tx_fifo.emax; i++)
+		net_if->tx_skb[i] = NULL;
+
+	return 0;
+
+err_out:
+	hf_free_tx_resource(net_if);
+
+	return -ENOMEM;
+}
+
+static void hf_free_rx_resource(struct hf_if *net_if)
+{
+	if (net_if->rx_fifo.addr) {
+		free_pages((unsigned long)(net_if->rx_fifo.addr),
+				get_order(net_if->rx_fifo.size));
+		net_if->rx_fifo.addr = 0;
+	}
+}
+
+static int hf_alloc_rx_resource(struct hf_net *net)
+{
+	struct hf_if *net_if = &(net->hfif);
+
+	net_if->rx_fifo.size = HF_RFIFO_SIZE;
+	net_if->rx_fifo.head = 0;
+	net_if->rx_fifo.tail = 0;
+	net_if->rx_fifo.emax = HF_RFIFO_SLOTS - 1;
+
+	net_if->rx_fifo.addr =
+		(void *)__get_free_pages(GFP_KERNEL,
+				get_order(net_if->rx_fifo.size));
+
+	if (net_if->rx_fifo.addr == 0) {
+		netdev_err(net->netdev,
+			"%s: hf_alloc_rx_resource: fail, size=0x%x\n",
+			net_if->name, net_if->rx_fifo.size);
+
+		return -ENOMEM;
+	}
+
+	memset(net_if->rx_fifo.addr, 0, net_if->rx_fifo.size);
+
+	return 0;
+}
+
+static void hf_free_resource(struct hf_if *net_if)
+{
+	hf_free_rx_resource(net_if);
+
+	hf_free_tx_resource(net_if);
+}
+
+static int hf_alloc_resource(struct hf_net *net)
+{
+	int			rc;
+	struct hf_if		*net_if = &(net->hfif);
+
+	rc = hf_alloc_tx_resource(net);
+	if (rc)
+		goto alloc_resource_err0;
+
+	rc = hf_alloc_rx_resource(net);
+	if (rc)
+		goto alloc_resource_err1;
+
+	return 0;
+
+alloc_resource_err1:
+	hf_free_tx_resource(net_if);
+alloc_resource_err0:
+	return rc;
+}
+
+static int hf_close_ip_window(struct hf_net *net, struct hfidd_acs *p_acs)
+{
+	struct hf_if *net_if = &(net->hfif);
+	int		rc;
+
+	if (net_if->doorbell) {
+		iounmap(net_if->doorbell);
+		net_if->doorbell = NULL;
+	}
+
+	/* Fill in the request structure */
+	net_if->client.hdr.req		   = HFIDD_REQ_CLOSE_WINDOW;
+	net_if->client.hdr.req_len	   = sizeof(struct hfi_window_info);
+	net_if->client.hdr.result.use.kptr = &(net_if->client);
+
+	rc = hfidd_close_window_func(HF_ACS(net_if), 0,
+			(struct hfi_window_info *)(&(net_if->client)));
+	if (rc) {
+		netdev_err(net->netdev,
+			"%s: hf_close_ip_window: fail, rc=0x%x\n",
+			net_if->name, rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+static int hf_open_ip_window(struct hf_net *net,
+			     struct hfidd_acs *p_acs)
+{
+	struct hf_if		*net_if = &(net->hfif);
+	int			rc = 0;
+
+	net_if->client.win_type = HFIDD_IP_WIN;
+
+	net_if->client.sfifo.eaddr.use.kptr	 = net_if->tx_fifo.addr;
+	net_if->client.sfifo.size		 = net_if->tx_fifo.size;
+	net_if->client.rfifo.eaddr.use.kptr	 = net_if->rx_fifo.addr;
+	net_if->client.rfifo.size		 = net_if->rx_fifo.size;
+	net_if->client.sfifo_finish_vec.use.kptr = net_if->sfifo_finishvec;
+	net_if->client.job_id			 = HF_IP_JOBID;
+
+	/* Fill in the request structure */
+	net_if->client.hdr.req		   = HFIDD_REQ_OPEN_WINDOW;
+	net_if->client.hdr.req_len	   = sizeof(struct hfi_client_info);
+	net_if->client.hdr.result.use.kptr = &(net_if->client);
+
+	rc = hfidd_open_window_func(p_acs, 0, &(net_if->client),
+			&(net_if->client));
+	if (rc) {
+		netdev_err(net->netdev,
+			"%s: hf_open_ip_window: fail open rc=0x%x\n",
+			net_if->name, rc);
+		return rc;
+	}
+
+	net_if->doorbell = (ioremap(
+		(u64)(net_if->client.mmio_regs.use.kptr), PAGE_SIZE_64K));
+
+	if (unlikely(net_if->doorbell == NULL)) {
+		netdev_err(net->netdev,
+			"%s: hf_open_ip_window: fail to map doorbell\n",
+			net_if->name);
+		hf_close_ip_window(net, p_acs);
+	}
+
+	net_if->isr_id = net_if->client.local_isrid;
+
+	return 0;
+}
+
+static int hf_set_mac_addr(struct net_device *netdev, void *p)
+{
+	struct hf_net		*net = netdev_priv(netdev);
+	struct hf_if		*net_if = &(net->hfif);
+
+	/* Mac address format: 02:ClusterID:ISR:ISR:HFI_WIN:WIN */
+
+	/* Locally administered MAC address */
+	netdev->dev_addr[0] = 0x2; /* bit6=1, bit7=0 */
+
+	netdev->dev_addr[1] = 0x0; /* cluster id */
+
+	*(u16 *)(&(netdev->dev_addr[2])) = (u16)(net_if->isr_id);
+
+	*(u16 *)(&(netdev->dev_addr[4])) = (u16)
+	(((net_if->ai) << HF_MAC_HFI_SHIFT) | (net_if->client.window));
+
+	return 0;
+}
+
+static int hf_net_delayed_open(void *parm, u16 win, u16 ext)
+{
+	struct net_device	*netdev = (struct net_device *)parm;
+	struct hf_net		*net = netdev_priv(netdev);
+	struct hf_if		*net_if = &(net->hfif);
+	int			rc = 0;
+	struct hfidd_acs	*p_acs = HF_ACS(net_if);
+
+	spin_lock(&(net_if->lock));
+	if (net_if->state != HF_NET_HALF_OPEN) {
+		netdev_err(netdev, "hf_net_delayed_open: net_if state=0x%x\n",
+			net_if->state);
+		spin_unlock(&(net_if->lock));
+		return -EINVAL;
+	}
+
+	rc = hf_alloc_resource(net);
+	if (rc)
+		goto delayed_open_err0;
+
+	rc = hf_open_ip_window(net, p_acs);
+	if (rc)
+		goto delayed_open_err1;
+
+	hf_set_mac_addr(netdev, NULL);
+
+	net_if->state = HF_NET_OPEN;
+	spin_unlock(&(net_if->lock));
+
+	return 0;
+
+delayed_open_err1:
+	hf_free_resource(net_if);
+
+delayed_open_err0:
+	spin_unlock(&(net_if->lock));
+
+	return rc;
+}
+
+static int hf_register_hfi_ready_callback(struct net_device *netdev,
+					  struct hfidd_acs *p_acs,
+					  int flag)
+{
+	struct hfi_reg_events	reg_events;
+	int			rc = 0;
+
+	reg_events.hdr.req    = flag;
+	reg_events.hdr.req_len = sizeof(struct hfi_reg_events);
+	reg_events.hdr.result.use.kptr = NULL;
+	reg_events.type	= FUNCTIONS_FOR_EVENTS;
+
+	reg_events.info.func.index = HFIDD_HFI_READY_REG;
+	reg_events.info.func.function_p.use.kptr = hf_net_delayed_open;
+	reg_events.info.func.parameter.use.kptr  = (void *)(netdev);
+
+	if (flag == HFIDD_REQ_EVENT_REGISTER)
+		rc = hfidd_callback_register(p_acs, &reg_events);
+	else
+		rc = hfidd_callback_unregister(p_acs, &reg_events);
+	if (rc) {
+		netdev_err(netdev, "hf_register_hfi_ready_callback: fail"
+			" flag=0x%x rc=0x%x\n", flag, rc);
+
+		return rc;
+	}
+
+	return 0;
+}
+
+static int hf_net_open(struct net_device *netdev)
+{
+	struct hf_net		*net = netdev_priv(netdev);
+	struct hf_if		*net_if = &(net->hfif);
+	int			rc = 0;
+	struct hfidd_acs	*p_acs = HF_ACS(net_if);
+
+	memset(&(netdev->stats), 0, sizeof(struct net_device_stats));
+	net_if->sfifo_packets = 0;
+
+	spin_lock(&(net_if->lock));
+	net_if->state = HF_NET_HALF_OPEN;
+	spin_unlock(&(net_if->lock));
+
+	netif_carrier_off(netdev);
+
+	rc = hf_register_hfi_ready_callback(netdev, p_acs,
+			HFIDD_REQ_EVENT_REGISTER);
+	if (rc != 0) {
+		spin_lock(&(net_if->lock));
+		net_if->state = HF_NET_CLOSE;
+		spin_unlock(&(net_if->lock));
+
+		netdev_err(netdev, "hf_net_open: hf_register_hfi_ready_callback"
+			"fail, rc=0x%x, state=0x%x", rc, net_if->state);
+		return rc;
+	}
+
+	return 0;
+}
+
+static int hf_net_close(struct net_device *netdev)
+{
+	struct hf_net		*net = netdev_priv(netdev);
+	struct hf_if		*net_if = &(net->hfif);
+	struct hfidd_acs	*p_acs = HF_ACS(net_if);
+
+	spin_lock(&(net_if->lock));
+	if (net_if->state == HF_NET_OPEN) {
+		hf_close_ip_window(net, p_acs);
+
+		hf_free_resource(net_if);
+	}
+
+	hf_register_hfi_ready_callback(netdev, p_acs,
+			HFIDD_REQ_EVENT_UNREGISTER);
+
+	net_if->state = HF_NET_CLOSE;
+	spin_unlock(&(net_if->lock));
+
+	return 0;
+}
+
+static int hf_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	if ((new_mtu <= 68) || (new_mtu > HF_NET_MTU))
+		return -ERANGE;
+
+	netdev->mtu = new_mtu;
+
+	return 0;
+}
+
+static int hf_hard_header(struct sk_buff *skb,
+			  struct net_device *netdev,
+			  u16 type,
+			  const void *daddr,
+			  const void *saddr,
+			  u32 len)
+{
+	struct ethhdr		*hwhdr_p;
+
+	skb_push(skb, ETH_HLEN);
+
+	hwhdr_p = (struct ethhdr *)(skb->data);
+	hwhdr_p->h_proto = htons(type);
+
+	if (!saddr)
+		saddr = netdev->dev_addr;
+
+	memcpy(hwhdr_p->h_source, saddr, netdev->addr_len);
+
+	if (daddr) {
+		memcpy(hwhdr_p->h_dest, daddr, netdev->addr_len);
+		return netdev->hard_header_len;
+	}
+
+	if (netdev->flags & IFF_NOARP) {
+		memset(hwhdr_p->h_dest, 0, netdev->addr_len);
+		return netdev->hard_header_len;
+	}
+
+	return -netdev->hard_header_len;
+}
+
+static const struct header_ops hf_header_ops = {
+	.create = hf_hard_header,
+};
+
+static const struct net_device_ops hf_netdev_ops = {
+	.ndo_open		= hf_net_open,
+	.ndo_stop		= hf_net_close,
+	.ndo_change_mtu		= hf_change_mtu,
+	.ndo_set_mac_address	= NULL,
+};
+
+static void hf_if_setup(struct net_device *netdev)
+{
+	netdev->type		= ARPHRD_HFI;
+	netdev->mtu		= HF_NET_MTU;
+	netdev->tx_queue_len	= 1000;
+	netdev->flags		= IFF_BROADCAST;
+	netdev->hard_header_len	= ETH_HLEN;
+	netdev->addr_len	= ETH_ALEN;
+	netdev->needed_headroom	= 0;
+
+	netdev->header_ops	= &hf_header_ops;
+	netdev->netdev_ops	= &hf_netdev_ops;
+
+	memcpy(netdev->broadcast, hfi_bcast_addr, ETH_ALEN);
+}
+
+static struct hf_net *hf_init_netdev(int idx, int ai)
+{
+	struct net_device	*netdev;
+	struct hf_net		*net;
+	int			ii;
+	int			rc;
+	char			ifname[HF_MAX_NAME_LEN];
+
+	ii = (idx * MAX_HFIS) + ai;
+	sprintf(ifname, "hf%d", ii);
+	netdev = alloc_netdev(sizeof(struct hf_net), ifname, hf_if_setup);
+	if (!netdev) {
+		printk(KERN_ERR "hf_init_netdev: "
+				"alloc_netdev for hfi%d:hf%d fail\n", ai, idx);
+		return ERR_PTR(-ENODEV);
+	}
+
+	net = netdev_priv(netdev);
+	net->netdev = netdev;
+
+	memset(&(net->hfif), 0, sizeof(struct hf_if));
+	net->hfif.idx = ii;	/* interface index */
+	net->hfif.ai  = ai;	/* adapter index */
+	strncpy(net->hfif.name, ifname, HF_MAX_NAME_LEN);
+	net->hfif.state = HF_NET_CLOSE;
+
+	spin_lock_init(&net->hfif.lock);
+
+	rc = register_netdev(netdev);
+	if (rc) {
+		netdev_err(netdev, "hf_init_netdev: "
+				"failed to register netdev=hfi%d:hf%d, "
+				"rc = 0x%x\n", ai, idx, rc);
+		free_netdev(netdev);
+		return ERR_PTR(-ENODEV);
+	}
+
+	return net;
+}
+
+static void hf_del_netdev(struct hf_net *net)
+{
+	struct net_device	*netdev = net->netdev;
+
+	unregister_netdev(netdev);
+
+	free_netdev(netdev);
+}
+
+static int hf_inet_event(struct notifier_block *this,
+			 unsigned long event,
+			 void *ifa)
+{
+	struct in_device	*in_dev;
+	struct net_device	*netdev;
+
+	in_dev = ((struct in_ifaddr *)ifa)->ifa_dev;
+
+	netdev = in_dev->dev;
+
+	if (!net_eq(dev_net(netdev), &init_net))
+		return NOTIFY_DONE;
+
+	if ((event == NETDEV_UP) && (netdev->netdev_ops == &hf_netdev_ops)) {
+		struct hf_if	*net_if;
+
+		net_if = &(((struct hf_net *)(netdev_priv(netdev)))->hfif);
+		net_if->ip_addr = ntohl(in_dev->ifa_list->ifa_address);
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block hf_inet_notifier = {
+	.notifier_call = hf_inet_event,
+};
+
+static int __init hf_init_module(void)
+{
+	u32		idx, ai;
+	int		rc;
+	struct hf_net	*net;
+
+	memset(&hf_ginfo, 0, sizeof(struct hf_global_info));
+
+	for (idx = 0; idx < MAX_HF_PER_HFI; idx++) {
+		for (ai = 0; ai < MAX_HFIS; ai++) {
+			net = hf_init_netdev(idx, ai);
+			if (IS_ERR(net)) {
+				printk(KERN_ERR "hf_init_module: hf_init_netdev"
+						" for idx %d ai %d failed rc"
+						" %ld\n",
+						idx, ai, PTR_ERR(net));
+
+				goto err_out;
+			}
+
+			hf_ginfo.net[idx][ai] = net;
+		}
+	}
+
+	register_inetaddr_notifier(&hf_inet_notifier);
+
+	printk(KERN_INFO "hfi_ip module loaded\n");
+	return 0;
+
+err_out:
+	rc = PTR_ERR(net);
+	for (idx = 0; idx < MAX_HF_PER_HFI; idx++) {
+		for (ai = 0; ai < MAX_HFIS; ai++) {
+			net = hf_ginfo.net[idx][ai];
+			if (net != NULL) {
+				hf_del_netdev(net);
+				hf_ginfo.net[idx][ai] = NULL;
+			}
+		}
+	}
+
+	return rc;
+}
+
+static void __exit hf_cleanup_module(void)
+{
+	u32		idx, ai;
+	struct hf_net	*net;
+
+	unregister_inetaddr_notifier(&hf_inet_notifier);
+	for (idx = 0; idx < MAX_HF_PER_HFI; idx++) {
+		for (ai = 0; ai < MAX_HFIS; ai++) {
+
+			net = hf_ginfo.net[idx][ai];
+			if (net != NULL) {
+				hf_del_netdev(net);
+				hf_ginfo.net[idx][ai] = NULL;
+			}
+		}
+	}
+
+	return;
+}
+
+module_init(hf_init_module);
+module_exit(hf_cleanup_module);
diff --git a/include/linux/hfi/hfi_ip.h b/include/linux/hfi/hfi_ip.h
new file mode 100644
index 0000000..6b6a74c
--- /dev/null
+++ b/include/linux/hfi/hfi_ip.h
@@ -0,0 +1,148 @@
+/*
+ * hfi_ip.h
+ *
+ * HF IP driver for IBM System p
+ *
+ *  Authors:
+ *      Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
+ *      William S. Cadden <wscadden@linux.vnet.ibm.com>
+ *      Wen C. Chen <wcchen@linux.vnet.ibm.com>
+ *      Scot Sakolish <sakolish@linux.vnet.ibm.com>
+ *      Jian Xiao <jian@linux.vnet.ibm.com>
+ *      Carol L. Soto <clsoto@linux.vnet.ibm.com>
+ *      Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
+ *
+ *  (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef _HFI_IP_H_
+#define _HFI_IP_H_
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <net/arp.h>
+
+#include <linux/hfi/hfidd_internal.h>
+#include <linux/hfi/hfidd_client.h>
+#include <linux/hfi/hfidd_requests.h>
+#include <linux/hfi/hfidd_pkt_formats.h>
+
+#define HF_DRV_VERSION			"1.0"
+#define HF_DRV_RELDATE			"July 7, 2010"
+#define HF_DRV_NAME			"hf"
+
+#define MAX_HF_PER_HFI			2
+#define	HF_IP_JOBID			0xFFFFFFF0
+#define HF_MAX_NAME_LEN			64
+
+#define HF_SFIFO_SIZE			0x40000	/* 256K */
+#define HF_SFIFO_SLOTS			(HF_SFIFO_SIZE >> HFI_CACHE_LINE_SHIFT)
+#define HF_RFIFO_SIZE			0x1000000	/* 16M */
+#define HF_RFIFO_SLOTS			(HF_RFIFO_SIZE >> HFI_CACHE_LINE_SHIFT)
+
+#define HF_FV_BIT_CNT			32
+
+#define HF_NET_MTU			(2048 - HF_IP_HDR_LEN - HF_PROTO_LEN)
+
+struct hfi_ip_extended_hdr {            /* 16B */
+	unsigned int	immediate_len:7;/* In bytes */
+	unsigned int	num_desc:3;     /* number of descriptors */
+					/* Logical Port ID: */
+	unsigned int	lpid_valid:1;   /* set by sending HFI */
+	unsigned int	lpid:4;         /* set by sending HFI */
+	/* Ethernet Service Header is 113 bits, which is 14 bytes + 1 bit */
+	unsigned int	ethernet_svc_hdr_hi:1;    /* Not used by HFI */
+	char            ethernet_svc_hdr[12];     /* Not used by HFI */
+	__sum16         bcast_csum;
+} __packed;
+
+struct hfi_ip_with_payload_pkt {
+	struct hfi_hdr			hfi_hdr;
+	struct hfi_ip_extended_hdr	ip_ext;
+	char				payload[2016];
+} __packed;
+
+#define HF_IP_HDR_LEN			((sizeof(struct hfi_hdr) + \
+				sizeof(struct hfi_ip_extended_hdr)))
+#define HF_ALIGN_PAD			2
+
+struct hf_if_proto_hdr {
+	u16			version;
+	u8			msg_type;
+	u8			msg_flag;
+	u32			msg_len;	/* Include HFI header */
+	u32			msg_id;
+};
+
+#define HF_PROTO_LEN		sizeof(struct hf_if_proto_hdr)
+
+struct hf_fifo {
+	void			*addr;
+	u32			size;		/* total bytes	*/
+	u32			head;
+	u32			tail;
+	u32			emax;		/* power 2 mask */
+	atomic_t		avail;		/* for tx	*/
+	atomic_t		outstanding;	/* for rx	*/
+};
+
+#define	HF_NET_CLOSE		0x00
+#define	HF_NET_HALF_OPEN	0xA0
+#define	HF_NET_OPEN		0xA1
+
+struct hf_if {
+	u32			idx;			/* 0, 1, 2, 3 ...   */
+	u32			ai;			/* 0=hfi0, 1=hfi1   */
+	char			name[HF_MAX_NAME_LEN];
+	u32			isr_id;
+	u32			ip_addr;
+	u32			state;			/* CLOSE, OPEN */
+	spinlock_t		lock;			/* lock for state */
+	u32			sfifo_fv_polarity;
+	u32			sfifo_slots_per_blk;
+	u32			sfifo_packets;
+	void __iomem		*doorbell;		/* mapped mmio_regs */
+	struct hf_fifo		tx_fifo;
+	struct hf_fifo		rx_fifo;
+	struct hfi_client_info	client;
+	struct sk_buff		**tx_skb;		/* array to store tx
+							   2k skb */
+	void			*sfifo_finishvec;
+};
+
+/* Private structure for HF inetrface */
+struct hf_net {
+	struct net_device	*netdev;
+	struct hf_if		hfif;
+};
+
+extern struct hfidd_global	hfidd_global;
+
+#define HF_ACS(net_if)		(hfidd_global.p_acs[(net_if)->ai])
+
+struct hf_global_info {
+	struct hf_net		*net[MAX_HF_PER_HFI][MAX_HFI_PER_TORRENT];
+};
+
+extern struct hf_global_info	hf_ginfo;
+
+#define HF_MAC_HFI_SHIFT	12
+#endif
diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index 6d722f4..f2cfdc1 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -41,6 +41,7 @@
 #define	ARPHRD_IEEE1394	24		/* IEEE 1394 IPv4 - RFC 2734	*/
 #define ARPHRD_EUI64	27		/* EUI-64                       */
 #define ARPHRD_INFINIBAND 32		/* InfiniBand			*/
+#define ARPHRD_HFI	37		/* Host Fabric Interface	*/
 
 /* Dummy types for non ARP hardware */
 #define ARPHRD_SLIP	256
-- 
1.7.3.5


^ permalink raw reply related

* [PATCH v4 25/27] HFI: hfi_ip fifo transmit paths
From: dykmanj @ 2011-04-25 21:24 UTC (permalink / raw)
  To: netdev
  Cc: Jim Dykman, Piyush Chaudhary, Fu-Chung Chang,  William S. Cadden,
	 Wen C. Chen, Scot Sakolish, Jian Xiao,  Carol L. Soto,
	 Sarah J. Sheppard
In-Reply-To: <1303766647-30156-1-git-send-email-dykmanj@linux.vnet.ibm.com>

From: Jim Dykman <dykmanj@linux.vnet.ibm.com>

Signed-off-by:  Piyush Chaudhary <piyushc@linux.vnet.ibm.com>
Signed-off-by:  Jim Dykman <dykmanj@linux.vnet.ibm.com>
Signed-off-by:  Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
Signed-off-by:  William S. Cadden <wscadden@linux.vnet.ibm.com>
Signed-off-by:  Wen C. Chen <winstonc@linux.vnet.ibm.com>
Signed-off-by:  Scot Sakolish <sakolish@linux.vnet.ibm.com>
Signed-off-by:  Jian Xiao <jian@linux.vnet.ibm.com>
Signed-off-by:  Carol L. Soto <clsoto@linux.vnet.ibm.com>
Signed-off-by:  Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
---
 drivers/net/hfi/ip/hf_proto.h    |    1 +
 drivers/net/hfi/ip/hfi_ip_main.c |  438 ++++++++++++++++++++++++++++++++++++++
 include/linux/hfi/hfi_ip.h       |   72 ++++++-
 3 files changed, 510 insertions(+), 1 deletions(-)

diff --git a/drivers/net/hfi/ip/hf_proto.h b/drivers/net/hfi/ip/hf_proto.h
index b4133b7..b0232ab 100644
--- a/drivers/net/hfi/ip/hf_proto.h
+++ b/drivers/net/hfi/ip/hf_proto.h
@@ -33,6 +33,7 @@
 #ifndef _HF_PROTO_H_
 #define _HF_PROTO_H_
 
+int hf_tx_check_avail(struct hf_net *net, u32 xmit_cls);
 extern int hfidd_open_window_func(struct hfidd_acs *p_acs,
 		u32 is_userspace,
 		struct hfi_client_info *user_p,
diff --git a/drivers/net/hfi/ip/hfi_ip_main.c b/drivers/net/hfi/ip/hfi_ip_main.c
index 0c1ebd7..689f92e 100644
--- a/drivers/net/hfi/ip/hfi_ip_main.c
+++ b/drivers/net/hfi/ip/hfi_ip_main.c
@@ -185,6 +185,87 @@ alloc_resource_err0:
 	return rc;
 }
 
+static int hf_send_intr_callback(void *parm, u32 win, u32 ext)
+{
+	struct hf_net	*net = (struct hf_net *)parm;
+	struct hf_if	*net_if = &(net->hfif);
+	u64		sintr_status;
+
+	sintr_status = hf_mmio_regs_read(net_if, HFI_SINTR_STATUS_REG);
+
+	netdev_info(net->netdev, "hf_send_intr_callback: "
+		"sintr_status 0x%016llx", sintr_status);
+
+	/* mask off the interrupt */
+	if (sintr_status & HF_SFIFO_INTR_EVENT)
+		hf_mmio_regs_write(net_if, HFI_SFIFO_INTR_CNTL, 0);
+
+	/* Make sure interrupts are masked */
+	/* Otherwise after the queue is awaken, it will get stale interrupt */
+	mb();
+
+	netif_wake_queue(net->netdev);
+
+	return 0;
+}
+
+struct hf_events_cb hf_events[HF_EVENT_NUM] = {
+	{HFIDD_SEND,		(void *)hf_send_intr_callback},
+};
+
+static int hf_register_ip_events(struct hf_net *net,
+				 struct hfidd_acs *p_acs,
+				 int flag)
+{
+	struct hf_if		*net_if = &(net->hfif);
+	int			rc = 0, i, j;
+	struct hfi_reg_events	events[HF_EVENT_NUM];
+	int			(*reg_func)(struct hfidd_acs *,
+				struct hfi_reg_events *);
+
+	if (flag == HFIDD_REQ_EVENT_REGISTER)
+		reg_func = hfidd_callback_register;
+	else
+		reg_func = hfidd_callback_unregister;
+
+	for (i = 0; i < HF_EVENT_NUM; i++) {
+		events[i].window = net_if->client.window;
+		events[i].type = FUNCTIONS_FOR_EVENTS;
+		events[i].info.func.index = hf_events[i].type;
+		events[i].info.func.function_p.use.kptr = hf_events[i].func;
+		events[i].info.func.parameter.use.kptr = (void *)(net);
+
+		events[i].hdr.req = flag;
+		events[i].hdr.req_len = sizeof(struct hfi_reg_events);
+		events[i].hdr.result.use.kptr = &(events[i]);
+
+		rc = reg_func(p_acs, &(events[i]));
+		if (rc) {
+			netdev_err(net->netdev, "hf_register_ip_events: "
+				"fail event 0x%x, flag=0x%x rc=0x%x\n",
+				hf_events[i].type, flag, rc);
+
+			if (flag == HFIDD_REQ_EVENT_REGISTER)
+				goto err_out;
+		}
+	}
+
+	return rc;
+
+err_out:
+	for (j = 0; j < i; j++) {
+		events[j].hdr.req = HFIDD_REQ_EVENT_UNREGISTER;
+		rc = hfidd_callback_unregister(p_acs, &(events[i]));
+		if (rc) {
+			netdev_err(net->netdev, "hf_register_ip_events: failed "
+				"to unregister callback event 0x%x, rc=0x%x\n",
+				events[i].info.func.index, rc);
+		}
+	}
+
+	return rc;
+}
+
 static int hf_close_ip_window(struct hf_net *net, struct hfidd_acs *p_acs)
 {
 	struct hf_if *net_if = &(net->hfif);
@@ -276,6 +357,16 @@ static int hf_set_mac_addr(struct net_device *netdev, void *p)
 	return 0;
 }
 
+static void hf_init_hw_regs(struct hf_if *net_if)
+{
+	/* setup IP with payload threshold in cache line size */
+	hf_mmio_regs_write(net_if, HFI_IP_RECV_SIZE,
+		(HF_PAYLOAD_RX_THRESHOLD << HF_PAYLOAD_RX_THRESH_SHIFT));
+
+	/* initialize SEND INTR STATUS */
+	hf_mmio_regs_write(net_if, HFI_SINTR_STATUS_REG, 0);
+}
+
 static int hf_net_delayed_open(void *parm, u16 win, u16 ext)
 {
 	struct net_device	*netdev = (struct net_device *)parm;
@@ -300,13 +391,25 @@ static int hf_net_delayed_open(void *parm, u16 win, u16 ext)
 	if (rc)
 		goto delayed_open_err1;
 
+	rc = hf_register_ip_events(net, p_acs, HFIDD_REQ_EVENT_REGISTER);
+	if (rc)
+		goto delayed_open_err2;
+
 	hf_set_mac_addr(netdev, NULL);
 
+	hf_init_hw_regs(net_if);
+
 	net_if->state = HF_NET_OPEN;
 	spin_unlock(&(net_if->lock));
 
+	netif_carrier_on(netdev);
+	netif_start_queue(netdev);
+
 	return 0;
 
+delayed_open_err2:
+	hf_close_ip_window(net, p_acs);
+
 delayed_open_err1:
 	hf_free_resource(net_if);
 
@@ -385,6 +488,11 @@ static int hf_net_close(struct net_device *netdev)
 
 	spin_lock(&(net_if->lock));
 	if (net_if->state == HF_NET_OPEN) {
+		netif_stop_queue(netdev);
+		netif_carrier_off(netdev);
+
+		hf_register_ip_events(net, p_acs, HFIDD_REQ_EVENT_UNREGISTER);
+
 		hf_close_ip_window(net, p_acs);
 
 		hf_free_resource(net_if);
@@ -399,6 +507,332 @@ static int hf_net_close(struct net_device *netdev)
 	return 0;
 }
 
+static void hf_tx_recycle(struct hf_if *net_if)
+{
+	u32		head, head_idx, slots_per_blk;
+	u32		*fv;
+	int		i;
+	u32		fv_bit;
+	u8		nr;
+
+	head = net_if->tx_fifo.head;
+
+	slots_per_blk = net_if->sfifo_slots_per_blk;
+
+	head_idx = head / slots_per_blk;
+
+	fv = (u32 *)(net_if->sfifo_finishvec);
+
+	while (1) {
+		nr = HF_FV_BIT_MAX - head_idx;
+		fv_bit = BIT(nr) & (ACCESS_ONCE(*fv));
+		fv_bit = fv_bit >> nr;
+
+		if ((fv_bit ^ (net_if->sfifo_fv_polarity)) == 0)
+			break;
+
+		for (i = 0; i < slots_per_blk; i++) {
+			struct sk_buff		*skb;
+
+			skb = net_if->tx_skb[head + i];
+			if (skb != NULL) {
+				dev_kfree_skb_any(skb);
+				net_if->tx_skb[head + i] = NULL;
+			}
+		}
+
+		head = (head + slots_per_blk) & (net_if->tx_fifo.emax);
+
+		atomic_add(slots_per_blk, &(net_if->tx_fifo.avail));
+
+		if (++head_idx == HF_FV_BIT_CNT) {
+			head_idx = 0;
+			net_if->sfifo_fv_polarity ^= 1;
+		}
+	}
+
+	net_if->tx_fifo.head = head;
+
+	return;
+}
+
+int hf_tx_check_avail(struct hf_net *net, u32 xmit_cls)
+{
+	struct net_device	*netdev = net->netdev;
+	struct hf_if		*net_if = &(net->hfif);
+
+	if (atomic_read(&net_if->tx_fifo.avail) < xmit_cls) {
+
+		hf_tx_recycle(net_if);
+
+		if (atomic_read(&net_if->tx_fifo.avail) < xmit_cls) {
+			u32		intr_cntl;
+			u64		intr_thresh;
+
+			netif_stop_queue(netdev);
+
+			/* turn on transmit interrupt */
+			intr_thresh = (net_if->sfifo_packets -
+			HF_SFIFO_INTR_WATERMARK) & HF_SFIFO_INTR_MASK;
+
+			intr_cntl = HF_SFIFO_INTR_ENABLE |
+			(intr_thresh << HF_SFIFO_INTR_CNT_SHIFT);
+
+			hf_mmio_regs_write_then_read(net_if,
+					HFI_SFIFO_INTR_CNTL, intr_cntl);
+
+			return -EBUSY;
+		}
+	}
+
+	return 0;
+}
+
+static inline void hf_fill_route(u16 dst_isr, struct base_hdr *base_hdr_p)
+{
+	base_hdr_p->route_control = HFI_HW_DIRECT_ROUTE;
+}
+
+static int hf_copy_skb_to_fifo(struct hf_net *net,
+				struct sk_buff *skb,
+				char *dst,
+				u32 len,
+				u32 offset)
+{
+	struct hf_if *net_if = &(net->hfif);
+	u64		fifo_end;
+	u32		tail_room;
+	int		rc;
+
+	fifo_end = (u64)(net_if->tx_fifo.addr) + net_if->tx_fifo.size;
+
+	tail_room = fifo_end - (u64)dst;
+	if (tail_room >= len) {
+		rc = skb_copy_bits(skb, offset, dst, len);
+		if (rc) {
+			netdev_err(net->netdev,
+				"hf_copy_skb_to_fifo: skb_copy_bits"
+				"fail1 offset=0x%x, len=0x%x, rc=0x%x\n",
+				offset, len, rc);
+			return rc;
+		}
+	} else {
+		rc = skb_copy_bits(skb, offset, dst, tail_room);
+		if (rc) {
+			netdev_err(net->netdev,
+				"hf_copy_skb_to_fifo: skb_copy_bits"
+				"fail2 offset=0x%x, len=0x%x, rc=0x%x\n",
+				offset, tail_room, rc);
+
+			return rc;
+		}
+		rc = skb_copy_bits(skb, offset + tail_room,
+				net_if->tx_fifo.addr, len - tail_room);
+		if (rc) {
+			netdev_err(net->netdev,
+				"hf_copy_skb_to_fifo: skb_copy_bits"
+				"fail3 offset=0x%x, len=0x%x, rc=0x%x\n",
+				offset + tail_room, len - tail_room, rc);
+
+			return rc;
+		}
+	}
+
+	return 0;
+}
+
+/* Build base_hdr and proto_hdr for payload pkt.
+   Return pointer to the end of proto_hdr */
+static char *hf_build_payload_hdr(struct hf_net *net,
+				  struct sk_buff *skb,
+				  u32 msg_len,
+				  u32 xmit_cls,
+				  u32 is_bcast)
+{
+	struct hf_if			*net_if = &(net->hfif);
+	struct hf_if_proto_hdr		*proto_hdr_p;
+	struct hfi_ip_with_payload_pkt	*hdr_p;
+	char				*dst;
+	u8				msg_type, msg_flag;
+	struct ethhdr			*hwhdr_p;
+
+	hwhdr_p = (struct ethhdr *)(skb->data);
+
+	if (hwhdr_p->h_proto == htons(ETH_P_IP))
+		msg_type = HF_IF_FIFO;
+	else if (hwhdr_p->h_proto == htons(ETH_P_ARP))
+		msg_type = HF_IF_ARP;
+	else {
+		netdev_err(net->netdev, "hf_build_payload_hdr: h_proto = 0x%x "
+			" not supported\n", hwhdr_p->h_proto);
+
+		dev_kfree_skb_any(skb);
+		return NULL;
+	}
+
+	dst = net_if->tx_fifo.addr +
+		(net_if->tx_fifo.tail << HFI_CACHE_LINE_SHIFT);
+
+	/* fill in base_hdr + ip_extended_hdr */
+	hdr_p = (struct hfi_ip_with_payload_pkt *)dst;
+
+	/* Do not memset over one cacheline since it might wrap */
+	memset(hdr_p, 0, HF_IP_HDR_LEN);
+
+	hdr_p->hfi_hdr.type.header_type = HFI_IP_WITH_PAYLOAD;
+	hdr_p->hfi_hdr.id.job_id = net_if->client.job_id;
+
+	if (is_bcast) {
+		hdr_p->hfi_hdr.base_hdr.dst_isr = HFIDD_DST_BCST_ISR;
+		hdr_p->hfi_hdr.base_hdr.dst_win = HFIDD_DST_BCST_WIN;
+		hdr_p->hfi_hdr.type.header_type = HFI_IP_MULTICAST_WITH_PAYLOAD;
+
+		msg_flag = HF_IF_BCAST;
+	} else {
+		u16	dst_isr, dst_win;
+
+		hf_get_dst_info(hwhdr_p, &dst_isr, &dst_win);
+		hdr_p->hfi_hdr.base_hdr.dst_isr = dst_isr;
+		hdr_p->hfi_hdr.base_hdr.dst_win = dst_win;
+
+		hf_fill_route(dst_isr, &(hdr_p->hfi_hdr.base_hdr));
+
+		msg_flag = HF_IF_UCAST;
+	}
+
+	netdev_dbg(net->netdev, "hf_build_payload_hdr: dst_isr = 0x%x, "
+			"dst_win = 0x%x, xmit_cls = 0x%x\n",
+			hdr_p->hfi_hdr.base_hdr.dst_isr,
+			hdr_p->hfi_hdr.base_hdr.dst_win, xmit_cls);
+
+	hdr_p->hfi_hdr.base_hdr.pkt_len = hfi_cachelines_to_pktlen(xmit_cls);
+
+	dst += HF_IP_HDR_LEN;
+	proto_hdr_p = (struct hf_if_proto_hdr *)dst;
+
+	proto_hdr_p->version = HF_PROTO_HDR_VERSION;
+	proto_hdr_p->msg_len = msg_len;
+	proto_hdr_p->msg_id = net_if->msg_id;
+	proto_hdr_p->msg_type = msg_type;
+	proto_hdr_p->msg_flag = msg_flag;
+
+	dst += HF_PROTO_LEN;
+
+	return dst;
+}
+
+static int hf_payload_tx(struct sk_buff *skb, struct hf_net *net, u32 is_bcast)
+{
+	struct hf_if		*net_if = &(net->hfif);
+	u32			msg_len, len;
+	u32			xmit_cls;
+	char			*dst;
+	int			rc = 0;
+
+	msg_len = skb->len - ETH_HLEN + HF_PROTO_LEN;
+	xmit_cls = hfi_bytes_to_cacheline(msg_len + HF_IP_HDR_LEN);
+
+	if (is_bcast) {
+		if (xmit_cls <= HF_BCAST_CACHE_LINE_2)
+			xmit_cls = HF_BCAST_CACHE_LINE_2;
+		else
+			xmit_cls = HF_BCAST_CACHE_LINE_16;
+	}
+
+	rc = hf_tx_check_avail(net, xmit_cls);
+	if (rc) {
+		netdev_err(net->netdev, "hf_payload_tx: hf_tx_check_avail find "
+				"no avail slot\n");
+		return rc;
+	}
+
+	dst = hf_build_payload_hdr(net, skb, msg_len, xmit_cls, is_bcast);
+	if (!dst)
+		return 0;
+
+	/* copy skb data, skipping hwhdr */
+	len = skb->len - ETH_HLEN;
+
+	rc = hf_copy_skb_to_fifo(net, skb, dst, len, ETH_HLEN);
+	if (rc)
+		return rc;
+
+	net_if->tx_fifo.tail =
+		(net_if->tx_fifo.tail + xmit_cls) & (net_if->tx_fifo.emax);
+	atomic_sub(xmit_cls, &(net_if->tx_fifo.avail));
+
+	net_if->sfifo_packets++;
+	net->netdev->stats.tx_packets++;
+	net->netdev->stats.tx_bytes += msg_len;
+
+	netdev_dbg(net->netdev, "hf_payload_tx: exit, tx_fifo tail = 0x%x, "
+		"avail = 0x%x, skb->len = 0x%x\n", net_if->tx_fifo.tail,
+		atomic_read(&(net_if->tx_fifo.avail)), skb->len);
+
+	dev_kfree_skb_any(skb);
+	return 0;
+
+}
+
+static int hf_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct hf_net	*net = netdev_priv(netdev);
+	struct hf_if	*net_if = &(net->hfif);
+	u32		len, is_bcast;
+	u32		send_cnt = 1;
+
+	is_bcast = !memcmp(((struct ethhdr *)(skb->data))->h_dest,
+				netdev->broadcast,
+				netdev->addr_len);
+
+	if (unlikely(skb->len <= 0)) {
+		netdev_err(netdev, "hf_start_xmit: invalid skb->len 0x%x\n",
+						skb->len);
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* total len to transfer */
+	len = skb->len - ETH_HLEN;
+
+	if (len <= HF_PAYLOAD_MAX) {
+		/* send ip with payload */
+		if (hf_payload_tx(skb, net, is_bcast) < 0) {
+			netdev_err(netdev, "hf_start_xmit: "
+				"hf_payload_tx fail 1\n");
+
+			return NETDEV_TX_BUSY;
+		}
+	} else {
+		netdev_err(netdev, "hf_start_xmit: skb->len 0x%x "
+			"greater than max 0x%x\n",
+			skb->len, (u32)HF_PAYLOAD_MAX);
+
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* Make sure all fields are written before ringing hw doorbell */
+	wmb();
+
+	/* ring doorbell */
+	hf_mmio_regs_write(net_if, HFI_SFIFO_DB_REG, send_cnt);
+
+	if (atomic_read(&net_if->tx_fifo.avail) < HF_TX_LOW_WATERMARK)
+		hf_tx_check_avail(net, HF_TX_LOW_WATERMARK);
+
+	net_if->msg_id++;
+	netdev->trans_start = jiffies;
+
+	return NETDEV_TX_OK;
+}
+
+static void hf_tx_timeout(struct net_device *netdev)
+{
+	netdev_warn(netdev, "hf_tx_timeout: queue_stopped is %d\n",
+			netif_queue_stopped(netdev));
+}
+
 static int hf_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	if ((new_mtu <= 68) || (new_mtu > HF_NET_MTU))
@@ -449,6 +883,8 @@ static const struct net_device_ops hf_netdev_ops = {
 	.ndo_open		= hf_net_open,
 	.ndo_stop		= hf_net_close,
 	.ndo_change_mtu		= hf_change_mtu,
+	.ndo_start_xmit		= hf_start_xmit,
+	.ndo_tx_timeout		= hf_tx_timeout,
 	.ndo_set_mac_address	= NULL,
 };
 
@@ -465,6 +901,8 @@ static void hf_if_setup(struct net_device *netdev)
 	netdev->header_ops	= &hf_header_ops;
 	netdev->netdev_ops	= &hf_netdev_ops;
 
+	netdev->watchdog_timeo	= HF_TX_TIMEOUT;
+
 	memcpy(netdev->broadcast, hfi_bcast_addr, ETH_ALEN);
 }
 
diff --git a/include/linux/hfi/hfi_ip.h b/include/linux/hfi/hfi_ip.h
index 6b6a74c..4e70c14 100644
--- a/include/linux/hfi/hfi_ip.h
+++ b/include/linux/hfi/hfi_ip.h
@@ -43,6 +43,7 @@
 #include <linux/hfi/hfidd_internal.h>
 #include <linux/hfi/hfidd_client.h>
 #include <linux/hfi/hfidd_requests.h>
+#include <linux/hfi/hfidd_regs.h>
 #include <linux/hfi/hfidd_pkt_formats.h>
 
 #define HF_DRV_VERSION			"1.0"
@@ -51,16 +52,32 @@
 
 #define MAX_HF_PER_HFI			2
 #define	HF_IP_JOBID			0xFFFFFFF0
+#define HF_TX_TIMEOUT			(500 * HZ)
+#define HF_NAPI_WEIGHT			256
 #define HF_MAX_NAME_LEN			64
 
+/* sfifo intr: bit 39-55 is threshold */
+/*             bit 34 enable, bit 35 unmask */
+#define HF_SFIFO_INTR_ENABLE		(0x3 << (63 - 35))
+#define HF_SFIFO_INTR_MASK		0x1FFFF		/* 17 bits */
+#define HF_SFIFO_INTR_CNT_SHIFT		(63 - 55)
+#define HF_SFIFO_INTR_EVENT		0x00000040 /* bit 57 */
+#define HF_SFIFO_INTR_WATERMARK		(HF_SFIFO_SLOTS - (HF_SFIFO_SLOTS >> 3))
+
 #define HF_SFIFO_SIZE			0x40000	/* 256K */
 #define HF_SFIFO_SLOTS			(HF_SFIFO_SIZE >> HFI_CACHE_LINE_SHIFT)
 #define HF_RFIFO_SIZE			0x1000000	/* 16M */
 #define HF_RFIFO_SLOTS			(HF_RFIFO_SIZE >> HFI_CACHE_LINE_SHIFT)
+#define HF_TX_LOW_WATERMARK		(HF_SFIFO_SLOTS >> 4)
 
 #define HF_FV_BIT_CNT			32
+#define HF_FV_BIT_MAX			31
+#define HF_SEND_ONE			1
 
-#define HF_NET_MTU			(2048 - HF_IP_HDR_LEN - HF_PROTO_LEN)
+#define HF_PAYLOAD_MAX			(2048 - HF_IP_HDR_LEN - HF_PROTO_LEN)
+#define HF_NET_MTU			HF_PAYLOAD_MAX
+#define HF_PAYLOAD_RX_THRESHOLD		0x10ULL
+#define HF_PAYLOAD_RX_THRESH_SHIFT	59
 
 struct hfi_ip_extended_hdr {            /* 16B */
 	unsigned int	immediate_len:7;/* In bytes */
@@ -83,6 +100,14 @@ struct hfi_ip_with_payload_pkt {
 #define HF_IP_HDR_LEN			((sizeof(struct hfi_hdr) + \
 				sizeof(struct hfi_ip_extended_hdr)))
 #define HF_ALIGN_PAD			2
+#define HF_PROTO_HDR_VERSION		0x1
+/* HFI protocol message type */
+#define	HF_IF_ARP			0xA0
+#define	HF_IF_FIFO			0xA1
+
+/* HFI protocol message flag */
+#define	HF_IF_UCAST			0xB0
+#define	HF_IF_BCAST			0xB1
 
 struct hf_if_proto_hdr {
 	u16			version;
@@ -93,6 +118,8 @@ struct hf_if_proto_hdr {
 };
 
 #define HF_PROTO_LEN		sizeof(struct hf_if_proto_hdr)
+#define HF_BCAST_CACHE_LINE_16	16
+#define HF_BCAST_CACHE_LINE_2	2
 
 struct hf_fifo {
 	void			*addr;
@@ -119,6 +146,7 @@ struct hf_if {
 	u32			sfifo_fv_polarity;
 	u32			sfifo_slots_per_blk;
 	u32			sfifo_packets;
+	u32			msg_id;
 	void __iomem		*doorbell;		/* mapped mmio_regs */
 	struct hf_fifo		tx_fifo;
 	struct hf_fifo		rx_fifo;
@@ -144,5 +172,47 @@ struct hf_global_info {
 
 extern struct hf_global_info	hf_ginfo;
 
+#define HF_EVENT_NUM		1
+
+struct hf_events_cb {
+	enum hfi_event_type	type;
+	void			*func;
+};
+
 #define HF_MAC_HFI_SHIFT	12
+#define HF_HDR_HFI_SHIFT	8
+
+static inline u32 hf_get_win(u16 id)
+{
+	return ((id >> HF_MAC_HFI_SHIFT) << HF_HDR_HFI_SHIFT) | (id & 0xFF);
+}
+
+static inline void hf_get_dst_info(struct ethhdr *hwhdr_p,
+				   u16 *d_isr,
+				   u16 *d_win)
+{
+	*d_isr = (*(u16 *)(&(hwhdr_p->h_dest[2]))) & 0xFFF;
+	*d_win = hf_get_win(*(u16 *)(&(hwhdr_p->h_dest[4])));
+}
+
+static inline void hf_mmio_regs_write_then_read(struct hf_if *net_if,
+				int off,
+				u64 data)
+{
+	__raw_writeq(data, net_if->doorbell + off);
+	isync();
+	__raw_readq(net_if->doorbell + off);
+	/* Make sure all received pkt shows up in rfifo */
+	mb();
+}
+
+static inline u64 hf_mmio_regs_read(struct hf_if *net_if, int off)
+{
+	return __raw_readq(net_if->doorbell + off);
+}
+
+static inline void hf_mmio_regs_write(struct hf_if *net_if, int off, u64 data)
+{
+	__raw_writeq(data, net_if->doorbell + off);
+}
 #endif
-- 
1.7.3.5


^ permalink raw reply related

* [PATCH v4 27/27] HFI: hfi_ip ethtool support
From: dykmanj @ 2011-04-25 21:24 UTC (permalink / raw)
  To: netdev
  Cc: Jim Dykman, Piyush Chaudhary, Fu-Chung Chang,  William S. Cadden,
	 Wen C. Chen, Scot Sakolish, Jian Xiao,  Carol L. Soto,
	 Sarah J. Sheppard
In-Reply-To: <1303766647-30156-1-git-send-email-dykmanj@linux.vnet.ibm.com>

From: Jim Dykman <dykmanj@linux.vnet.ibm.com>

Signed-off-by:  Piyush Chaudhary <piyushc@linux.vnet.ibm.com>
Signed-off-by:  Jim Dykman <dykmanj@linux.vnet.ibm.com>
Signed-off-by:  Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
Signed-off-by:  William S. Cadden <wscadden@linux.vnet.ibm.com>
Signed-off-by:  Wen C. Chen <winstonc@linux.vnet.ibm.com>
Signed-off-by:  Scot Sakolish <sakolish@linux.vnet.ibm.com>
Signed-off-by:  Jian Xiao <jian@linux.vnet.ibm.com>
Signed-off-by:  Carol L. Soto <clsoto@linux.vnet.ibm.com>
Signed-off-by:  Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
---
 drivers/net/hfi/ip/Makefile      |    2 +-
 drivers/net/hfi/ip/hf_ethtool.c  |  136 ++++++++++++++++++++++++++++++++++++++
 drivers/net/hfi/ip/hf_proto.h    |    1 +
 drivers/net/hfi/ip/hfi_ip_main.c |   36 +++++++++-
 include/linux/hfi/hfi_ip.h       |   32 +++++++++-
 5 files changed, 201 insertions(+), 6 deletions(-)
 create mode 100644 drivers/net/hfi/ip/hf_ethtool.c

diff --git a/drivers/net/hfi/ip/Makefile b/drivers/net/hfi/ip/Makefile
index 90c7dea..28a4a51 100644
--- a/drivers/net/hfi/ip/Makefile
+++ b/drivers/net/hfi/ip/Makefile
@@ -3,4 +3,4 @@
 #
 obj-$(CONFIG_HFI_IP) += hfi_ip.o
 
-hfi_ip-objs :=	hfi_ip_main.o
+hfi_ip-objs :=	hfi_ip_main.o hf_ethtool.o
diff --git a/drivers/net/hfi/ip/hf_ethtool.c b/drivers/net/hfi/ip/hf_ethtool.c
new file mode 100644
index 0000000..204a1bf
--- /dev/null
+++ b/drivers/net/hfi/ip/hf_ethtool.c
@@ -0,0 +1,136 @@
+/*
+ * hf_ethtool.c
+ *
+ * HF IP driver for IBM System p
+ *
+ *  Authors:
+ *	Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
+ *	William S. Cadden <wscadden@linux.vnet.ibm.com>
+ *	Wen C. Chen <winstonc@linux.vnet.ibm.com>
+ *	Scot Sakolish <sakolish@linux.vnet.ibm.com>
+ *	Jian Xiao <jian@linux.vnet.ibm.com>
+ *	Carol L. Soto <clsoto@linux.vnet.ibm.com>
+ *	Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
+ *
+ *  (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/ethtool.h>
+
+#include <linux/hfi/hfi_ip.h>
+
+static char hf_ethtool_stats_keys[][ETH_GSTRING_LEN] = {
+	{"sfifo_packets"},
+	{"rdma_packets"},
+	{"tx_timeout"},
+	{"tx_queue_stop"},
+	{"tx_drop"},
+	{"tx_err_headlen"},
+	{"rx_version_mismatch"},
+	{"rx_err_restore"},
+	{"rx_err_cookie"},
+	{"rx_err_skb"},
+	{"rx_err_hdr_type"},
+	{"rx_err_msg_type"},
+	{"rx_err_status"},
+	{"rx_err_bcast_csum"},
+	{"rx_fslot_debt"},
+	{"mmio_rx_inc_avail"},
+	{"mmio_rx_post_desc"},
+	{"payload_sent"},
+	{"desc_sent"},
+	{"large_bcast_sent"},
+	{"super_sent"},
+	{"payload_recv"},
+	{"desc_recv"},
+	{"rdma_write"},
+	{"rdma_write_fail"},
+	{"rdma_cancel"},
+	{"rdma_cancel_fail"},
+	{"rdma_cancel_already"},
+	{"rdma_rndz_request_sent"},
+	{"rdma_rndz_request_fail"},
+	{"rdma_rndz_reply_recv"},
+	{"rdma_rndz_reply_fail"},
+	{"rdma_rndz_request_recv"},
+	{"rdma_rndz_reply_sent"},
+	{"bad_rdma_notification"},
+	{"bad_rdma_first_notification"},
+	{"rdma_src_completion"},
+	{"rdma_sink_completion"},
+	{"rdma_send_timeout"},
+	{"rdma_recv_timeout"},
+	{"sfifo_send_intr_armed"},
+	{"rdma_send_intr_armed"},
+	{"recv_intr_armed"},
+	{"recv_intr_offset"},
+	{"recv_imm_intr_armed"},
+	{"recv_imm_intr_offset"},
+	{"send_intr_fired"},
+	{"recv_intr_fired"},
+	{"in_poll"},
+	{"max_poll_recv"},
+};
+
+static void hf_get_drvinfo(struct net_device *netdev,
+		struct ethtool_drvinfo *info)
+{
+	strlcpy(info->driver, HF_DRV_NAME, sizeof(info->driver));
+	strlcpy(info->version, HF_DRV_VERSION, sizeof(info->version));
+}
+
+static void hf_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+	switch (stringset) {
+	case ETH_SS_STATS:
+		memcpy(data, &hf_ethtool_stats_keys,
+				sizeof(hf_ethtool_stats_keys));
+		break;
+	}
+}
+
+static int hf_get_sset_count(struct net_device *netdev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return ARRAY_SIZE(hf_ethtool_stats_keys);
+	default:
+		return -EINVAL;
+	}
+}
+
+static void hf_get_ethtool_stats(struct net_device *netdev,
+		struct ethtool_stats *stats, u64 *data)
+{
+	struct hf_net	*net = netdev_priv(netdev);
+	struct hf_if	*net_if = &(net->hfif);
+
+	memcpy(data, &(net_if->eth_stats), sizeof(struct hf_ethtool_stats));
+}
+
+static const struct ethtool_ops hf_ethtool_ops = {
+	.get_drvinfo		= hf_get_drvinfo,
+	.get_strings		= hf_get_strings,
+	.get_sset_count		= hf_get_sset_count,
+	.get_ethtool_stats	= hf_get_ethtool_stats,
+};
+
+void hf_set_ethtool_ops(struct net_device *netdev)
+{
+	SET_ETHTOOL_OPS(netdev, &hf_ethtool_ops);
+}
diff --git a/drivers/net/hfi/ip/hf_proto.h b/drivers/net/hfi/ip/hf_proto.h
index 022512a..3b2b23b 100644
--- a/drivers/net/hfi/ip/hf_proto.h
+++ b/drivers/net/hfi/ip/hf_proto.h
@@ -36,6 +36,7 @@
 int hf_tx_check_avail(struct hf_net *net, u32 xmit_cls);
 void hf_construct_hwhdr(struct hf_if *net_if, struct sk_buff *skb,
 			struct base_hdr *b_hdr);
+void hf_set_ethtool_ops(struct net_device *netdev);
 extern int hfidd_open_window_func(struct hfidd_acs *p_acs,
 		u32 is_userspace,
 		struct hfi_client_info *user_p,
diff --git a/drivers/net/hfi/ip/hfi_ip_main.c b/drivers/net/hfi/ip/hfi_ip_main.c
index 6b2ec3f..4b897d3 100644
--- a/drivers/net/hfi/ip/hfi_ip_main.c
+++ b/drivers/net/hfi/ip/hfi_ip_main.c
@@ -208,6 +208,7 @@ static int hf_send_intr_callback(void *parm, u32 win, u32 ext)
 	mb();
 
 	netif_wake_queue(net->netdev);
+	net->hfif.eth_stats.send_intr_fired++;
 
 	return 0;
 }
@@ -218,6 +219,7 @@ static int hf_recv_intr_callback(void *parm, u32 win, u32 ext)
 
 	napi_schedule(&(net->napi));
 
+	net->hfif.eth_stats.recv_intr_fired++;
 	return 0;
 }
 
@@ -381,6 +383,9 @@ static void hf_set_recv_intr(struct hf_if *net_if)
 	hf_mmio_regs_write_then_read(net_if, HFI_RFIFO_INTR_REG,
 		(HF_ENA_RECV_INTR + (offset << HF_RECV_INTR_MATCH_SHIFT)));
 
+	net_if->eth_stats.recv_intr_offset = offset;
+	net_if->eth_stats.recv_intr_armed++;
+
 	/* check if there is packet received in the mean time */
 	rx_pkt = net_if->rx_fifo.addr + (offset << HFI_CACHE_LINE_SHIFT);
 
@@ -390,6 +395,9 @@ static void hf_set_recv_intr(struct hf_if *net_if)
 		/* force an immediate recv intr */
 		hf_mmio_regs_write(net_if, HFI_RFIFO_INTR_REG,
 		(HF_IMM_RECV_INTR + (offset << HF_RECV_INTR_MATCH_SHIFT)));
+
+		net_if->eth_stats.recv_imm_intr_offset = offset;
+		net_if->eth_stats.recv_imm_intr_armed++;
 	}
 }
 
@@ -507,7 +515,7 @@ static int hf_net_open(struct net_device *netdev)
 	struct hfidd_acs	*p_acs = HF_ACS(net_if);
 
 	memset(&(netdev->stats), 0, sizeof(struct net_device_stats));
-	net_if->sfifo_packets = 0;
+	memset(&(net_if->eth_stats), 0, sizeof(struct hf_ethtool_stats));
 
 	spin_lock(&(net_if->lock));
 	net_if->state = HF_NET_HALF_OPEN;
@@ -614,6 +622,7 @@ static inline int hf_check_hdr_version(struct hf_net *net,
 			"hf_check_hdr_version: hdr version 0x%x "
 			"does not match 0x%x\n",
 			hf_hdr->version, HF_PROTO_HDR_VERSION);
+		net->hfif.eth_stats.rx_version_mismatch++;
 		net->netdev->stats.rx_dropped++;
 		return -EINVAL;
 	}
@@ -710,6 +719,7 @@ static void hf_recv_ip_with_payload(struct hf_net *net,
 	netdev->stats.rx_packets++;
 	netdev->stats.rx_bytes += skb->len;
 
+	net_if->eth_stats.payload_recv++;
 	netif_receive_skb(skb);
 }
 
@@ -717,6 +727,8 @@ static void hf_recv_ip_good(struct hf_net *net,
 			    struct hfi_hdr *rx_curr,
 			    u32 pkt_len)
 {
+	struct hf_if	*net_if = &(net->hfif);
+
 	switch (rx_curr->type.header_type) {
 
 	case  HFI_IP_WITH_PAYLOAD:
@@ -731,6 +743,7 @@ static void hf_recv_ip_good(struct hf_net *net,
 			rx_curr->type.header_type, pkt_len);
 
 		/* unknown packet, drop it */
+		net_if->eth_stats.rx_err_hdr_type++;
 		net->netdev->stats.rx_dropped++;
 		break;
 	}
@@ -767,6 +780,7 @@ static int hf_rx(struct hf_net *net, int budget)
 				"status = 0x%x, pkt_len = 0x%x\n",
 				status, pkt_len);
 
+			net_if->eth_stats.rx_err_status++;
 			net->netdev->stats.rx_dropped++;
 		}
 
@@ -782,6 +796,7 @@ static int hf_rx(struct hf_net *net, int budget)
 			hf_mmio_regs_write(net_if, HFI_RFIFO_INC_FSLOT_REG,
 					net_if->rx_fslot_debt);
 			net_if->rx_fslot_debt = 0;
+			net_if->eth_stats.mmio_rx_inc_avail++;
 		}
 
 		budget--;
@@ -791,6 +806,7 @@ static int hf_rx(struct hf_net *net, int budget)
 
 	}
 
+	net_if->eth_stats.rx_fslot_debt = net_if->rx_fslot_debt;
 	netdev_dbg(net->netdev, "hf_rx: exit, head = 0x%x, recv 0x%x pkts\n",
 			net_if->rx_fifo.head, num);
 
@@ -860,9 +876,10 @@ int hf_tx_check_avail(struct hf_net *net, u32 xmit_cls)
 			u64		intr_thresh;
 
 			netif_stop_queue(netdev);
+			net_if->eth_stats.tx_queue_stop++;
 
 			/* turn on transmit interrupt */
-			intr_thresh = (net_if->sfifo_packets -
+			intr_thresh = (net_if->eth_stats.sfifo_packets -
 			HF_SFIFO_INTR_WATERMARK) & HF_SFIFO_INTR_MASK;
 
 			intr_cntl = HF_SFIFO_INTR_ENABLE |
@@ -871,6 +888,7 @@ int hf_tx_check_avail(struct hf_net *net, u32 xmit_cls)
 			hf_mmio_regs_write_then_read(net_if,
 					HFI_SFIFO_INTR_CNTL, intr_cntl);
 
+			net_if->eth_stats.sfifo_send_intr_armed++;
 			return -EBUSY;
 		}
 	}
@@ -957,6 +975,7 @@ static char *hf_build_payload_hdr(struct hf_net *net,
 			" not supported\n", hwhdr_p->h_proto);
 
 		dev_kfree_skb_any(skb);
+		net_if->eth_stats.tx_drop++;
 		return NULL;
 	}
 
@@ -1051,7 +1070,8 @@ static int hf_payload_tx(struct sk_buff *skb, struct hf_net *net, u32 is_bcast)
 		(net_if->tx_fifo.tail + xmit_cls) & (net_if->tx_fifo.emax);
 	atomic_sub(xmit_cls, &(net_if->tx_fifo.avail));
 
-	net_if->sfifo_packets++;
+	net_if->eth_stats.sfifo_packets++;
+	net_if->eth_stats.payload_sent++;
 	net->netdev->stats.tx_packets++;
 	net->netdev->stats.tx_bytes += msg_len;
 
@@ -1079,6 +1099,7 @@ static int hf_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 		netdev_err(netdev, "hf_start_xmit: invalid skb->len 0x%x\n",
 						skb->len);
 		dev_kfree_skb_any(skb);
+		net_if->eth_stats.tx_drop++;
 		return NETDEV_TX_OK;
 	}
 
@@ -1119,8 +1140,12 @@ static int hf_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 static void hf_tx_timeout(struct net_device *netdev)
 {
+	struct hf_net	*net = netdev_priv(netdev);
+	struct hf_if	*net_if = &(net->hfif);
+
 	netdev_warn(netdev, "hf_tx_timeout: queue_stopped is %d\n",
 			netif_queue_stopped(netdev));
+	net_if->eth_stats.tx_timeout++;
 }
 
 static int hf_change_mtu(struct net_device *netdev, int new_mtu)
@@ -1207,6 +1232,7 @@ static int hf_poll(struct napi_struct *napi, int budget)
 	net_if	= &(net->hfif);
 	netdev	= net->netdev;
 
+	net_if->eth_stats.in_poll++;
 	work_done = hf_rx(net, budget);
 
 	/* Always assume we have received all available packets */
@@ -1215,7 +1241,8 @@ static int hf_poll(struct napi_struct *napi, int budget)
 		napi_complete(napi);
 		isync();
 		hf_set_recv_intr(net_if);
-	}
+	} else
+		net_if->eth_stats.max_poll_recv++;
 
 	return work_done;
 }
@@ -1248,6 +1275,7 @@ static struct hf_net *hf_init_netdev(int idx, int ai)
 	net->hfif.state = HF_NET_CLOSE;
 
 	spin_lock_init(&net->hfif.lock);
+	hf_set_ethtool_ops(netdev);
 
 	rc = register_netdev(netdev);
 	if (rc) {
diff --git a/include/linux/hfi/hfi_ip.h b/include/linux/hfi/hfi_ip.h
index ec87300..d4317ee 100644
--- a/include/linux/hfi/hfi_ip.h
+++ b/include/linux/hfi/hfi_ip.h
@@ -42,6 +42,7 @@
 #include <net/arp.h>
 
 #include <linux/hfi/hfidd_internal.h>
+#include <linux/hfi/hfidd_adpt.h>
 #include <linux/hfi/hfidd_client.h>
 #include <linux/hfi/hfidd_requests.h>
 #include <linux/hfi/hfidd_regs.h>
@@ -150,6 +151,35 @@ struct hf_fifo {
 #define	HF_NET_HALF_OPEN	0xA0
 #define	HF_NET_OPEN		0xA1
 
+struct hf_ethtool_stats {
+	u64		sfifo_packets;	/* total packets send through sfifo */
+	u64		tx_timeout;
+	u64		tx_queue_stop;
+	u64		tx_drop;
+	u64		tx_err_headlen;
+	u64		rx_version_mismatch;
+	u64		rx_err_skb;
+	u64		rx_err_hdr_type;
+	u64		rx_err_msg_type;
+	u64		rx_err_status;
+	u64		rx_err_bcast_csum;
+	u64		rx_fslot_debt;
+	u64		mmio_rx_inc_avail;
+	u64		payload_sent;	/* packets from IP send with payload
+					   mode */
+	u64		payload_recv;	/* packets delivered to IP with payload
+					   mode */
+	u64		sfifo_send_intr_armed;
+	u64		recv_intr_armed;
+	u64		recv_intr_offset;
+	u64		recv_imm_intr_armed;
+	u64		recv_imm_intr_offset;
+	u64		send_intr_fired;
+	u64		recv_intr_fired;
+	u64		in_poll;
+	u64		max_poll_recv;
+};
+
 struct hf_if {
 	u32			idx;			/* 0, 1, 2, 3 ...   */
 	u32			ai;			/* 0=hfi0, 1=hfi1   */
@@ -160,7 +190,6 @@ struct hf_if {
 	spinlock_t		lock;			/* lock for state */
 	u32			sfifo_fv_polarity;
 	u32			sfifo_slots_per_blk;
-	u32			sfifo_packets;
 	u32			rx_pkt_valid;		/* Polarity of recv
 							   packet valid bit */
 	u32			msg_id;
@@ -172,6 +201,7 @@ struct hf_if {
 	struct sk_buff		**tx_skb;		/* array to store tx
 							   2k skb */
 	void			*sfifo_finishvec;
+	struct hf_ethtool_stats eth_stats;
 };
 
 /* Private structure for HF inetrface */
-- 
1.7.3.5


^ permalink raw reply related

* [PATCH v4 26/27] HFI: hfi_ip fifo receive path
From: dykmanj @ 2011-04-25 21:24 UTC (permalink / raw)
  To: netdev
  Cc: Jim Dykman, Piyush Chaudhary, Fu-Chung Chang,  William S. Cadden,
	 Wen C. Chen, Scot Sakolish, Jian Xiao,  Carol L. Soto,
	 Sarah J. Sheppard
In-Reply-To: <1303766647-30156-1-git-send-email-dykmanj@linux.vnet.ibm.com>

From: Jim Dykman <dykmanj@linux.vnet.ibm.com>

Signed-off-by:  Piyush Chaudhary <piyushc@linux.vnet.ibm.com>
Signed-off-by:  Jim Dykman <dykmanj@linux.vnet.ibm.com>
Signed-off-by:  Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
Signed-off-by:  William S. Cadden <wscadden@linux.vnet.ibm.com>
Signed-off-by:  Wen C. Chen <winstonc@linux.vnet.ibm.com>
Signed-off-by:  Scot Sakolish <sakolish@linux.vnet.ibm.com>
Signed-off-by:  Jian Xiao <jian@linux.vnet.ibm.com>
Signed-off-by:  Carol L. Soto <clsoto@linux.vnet.ibm.com>
Signed-off-by:  Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
---
 drivers/net/hfi/ip/hf_proto.h    |    2 +
 drivers/net/hfi/ip/hfi_ip_main.c |  326 +++++++++++++++++++++++++++++++++++++-
 include/linux/hfi/hfi_ip.h       |   26 +++-
 3 files changed, 351 insertions(+), 3 deletions(-)

diff --git a/drivers/net/hfi/ip/hf_proto.h b/drivers/net/hfi/ip/hf_proto.h
index b0232ab..022512a 100644
--- a/drivers/net/hfi/ip/hf_proto.h
+++ b/drivers/net/hfi/ip/hf_proto.h
@@ -34,6 +34,8 @@
 #define _HF_PROTO_H_
 
 int hf_tx_check_avail(struct hf_net *net, u32 xmit_cls);
+void hf_construct_hwhdr(struct hf_if *net_if, struct sk_buff *skb,
+			struct base_hdr *b_hdr);
 extern int hfidd_open_window_func(struct hfidd_acs *p_acs,
 		u32 is_userspace,
 		struct hfi_client_info *user_p,
diff --git a/drivers/net/hfi/ip/hfi_ip_main.c b/drivers/net/hfi/ip/hfi_ip_main.c
index 689f92e..6b2ec3f 100644
--- a/drivers/net/hfi/ip/hfi_ip_main.c
+++ b/drivers/net/hfi/ip/hfi_ip_main.c
@@ -154,6 +154,9 @@ static int hf_alloc_rx_resource(struct hf_net *net)
 
 	memset(net_if->rx_fifo.addr, 0, net_if->rx_fifo.size);
 
+	net_if->rx_fslot_debt = 0;
+	net_if->rx_pkt_valid = 1;
+
 	return 0;
 }
 
@@ -209,8 +212,18 @@ static int hf_send_intr_callback(void *parm, u32 win, u32 ext)
 	return 0;
 }
 
+static int hf_recv_intr_callback(void *parm, u32 win, u32 ext)
+{
+	struct hf_net	*net = (struct hf_net *)parm;
+
+	napi_schedule(&(net->napi));
+
+	return 0;
+}
+
 struct hf_events_cb hf_events[HF_EVENT_NUM] = {
 	{HFIDD_SEND,		(void *)hf_send_intr_callback},
+	{HFIDD_RECV,		(void *)hf_recv_intr_callback},
 };
 
 static int hf_register_ip_events(struct hf_net *net,
@@ -357,14 +370,50 @@ static int hf_set_mac_addr(struct net_device *netdev, void *p)
 	return 0;
 }
 
+static void hf_set_recv_intr(struct hf_if *net_if)
+{
+	int			offset;
+	struct hfi_hdr		*rx_pkt;
+
+	/* enable recv intr and set threshold to next packet */
+	offset = net_if->rx_fifo.head;
+
+	hf_mmio_regs_write_then_read(net_if, HFI_RFIFO_INTR_REG,
+		(HF_ENA_RECV_INTR + (offset << HF_RECV_INTR_MATCH_SHIFT)));
+
+	/* check if there is packet received in the mean time */
+	rx_pkt = net_if->rx_fifo.addr + (offset << HFI_CACHE_LINE_SHIFT);
+
+	if ((rx_pkt->id.job_id == HF_IP_JOBID) &&
+		(rx_pkt->base_hdr.pkt_valid == net_if->rx_pkt_valid)) {
+
+		/* force an immediate recv intr */
+		hf_mmio_regs_write(net_if, HFI_RFIFO_INTR_REG,
+		(HF_IMM_RECV_INTR + (offset << HF_RECV_INTR_MATCH_SHIFT)));
+	}
+}
+
 static void hf_init_hw_regs(struct hf_if *net_if)
 {
 	/* setup IP with payload threshold in cache line size */
 	hf_mmio_regs_write(net_if, HFI_IP_RECV_SIZE,
 		(HF_PAYLOAD_RX_THRESHOLD << HF_PAYLOAD_RX_THRESH_SHIFT));
 
+	/* setup recv fifo out of order intr control to disable */
+	hf_mmio_regs_write(net_if, HFI_RFIFO_OUT_EVENT_REG,
+			HF_RFIFO_OUT_CNTL_REARM);
+
+	/* setup recv fifo out of order threshold */
+	hf_mmio_regs_write(net_if, HFI_RFIFO_OUT_TH_REG, HF_RFIFO_OUT_THRESH);
+
 	/* initialize SEND INTR STATUS */
 	hf_mmio_regs_write(net_if, HFI_SINTR_STATUS_REG, 0);
+
+	hf_mmio_regs_write(net_if, HFI_RFIFO_INJ_TH_REG,
+			(HF_RFIFO_CACHE_INJ_TH << HF_RFIFO_CACHE_INJ_TH_SHIFT));
+
+	/* enable and set receive intr */
+	hf_set_recv_intr(net_if);
 }
 
 static int hf_net_delayed_open(void *parm, u16 win, u16 ext)
@@ -402,6 +451,7 @@ static int hf_net_delayed_open(void *parm, u16 win, u16 ext)
 	net_if->state = HF_NET_OPEN;
 	spin_unlock(&(net_if->lock));
 
+	napi_enable(&net->napi);
 	netif_carrier_on(netdev);
 	netif_start_queue(netdev);
 
@@ -488,6 +538,7 @@ static int hf_net_close(struct net_device *netdev)
 
 	spin_lock(&(net_if->lock));
 	if (net_if->state == HF_NET_OPEN) {
+		napi_disable(&net->napi);
 		netif_stop_queue(netdev);
 		netif_carrier_off(netdev);
 
@@ -507,6 +558,245 @@ static int hf_net_close(struct net_device *netdev)
 	return 0;
 }
 
+/* Invalidate the jobid field of each cache line before advancing head.
+ * The first cache line is protected by the valid bit, so we skip it. */
+static inline void hf_advance_rx_head(struct hf_if *net_if, u32 len)
+{
+	int		i, h;
+	u32		*cache_p;
+
+	h = (net_if->rx_fifo.head + 1) & (net_if->rx_fifo.emax);
+
+	for (i = 1; i < len; i++) {
+		cache_p = (u32 *)((char *)(net_if->rx_fifo.addr) +
+				(h << HFI_CACHE_LINE_SHIFT));
+		if (*cache_p == HF_IP_JOBID)
+			*cache_p = 0;
+		h = (h + 1) & (net_if->rx_fifo.emax);
+	}
+
+	if (net_if->rx_fifo.head > h)
+		net_if->rx_pkt_valid ^= 0x1;
+
+	net_if->rx_fifo.head = h;
+}
+
+void hf_construct_hwhdr(struct hf_if *net_if,
+			struct sk_buff *skb,
+			struct base_hdr *b_hdr)
+{
+	struct ethhdr		*hwhdr_p;
+
+	hwhdr_p = (struct ethhdr *)(skb->data);
+
+	/* MAC byte 1, bits6 = 1, locally admin MAC */
+	hwhdr_p->h_dest[0] = 0x2;
+	/* MAC byte 2, bits2-7 = cluster id */
+	hwhdr_p->h_dest[1] = 0x0;
+	*(u16 *)(&(hwhdr_p->h_dest[2])) = (u16)(b_hdr->dst_isr);
+	*(u16 *)(&(hwhdr_p->h_dest[4])) =
+			(u16)hf_get_mac(b_hdr->dst_win);
+
+	hwhdr_p->h_source[0] = 0x2;
+	hwhdr_p->h_source[1] = 0x0;
+	*(u16 *)(&(hwhdr_p->h_source[2])) = (u16)(b_hdr->src_isr);
+	*(u16 *)(&(hwhdr_p->h_source[4])) =
+				(u16)hf_get_mac(b_hdr->src_win);
+
+	hwhdr_p->h_proto = skb->protocol;
+}
+
+static inline int hf_check_hdr_version(struct hf_net *net,
+				struct hf_if_proto_hdr *hf_hdr)
+{
+	if (hf_hdr->version != HF_PROTO_HDR_VERSION) {
+		netdev_err(net->netdev,
+			"hf_check_hdr_version: hdr version 0x%x "
+			"does not match 0x%x\n",
+			hf_hdr->version, HF_PROTO_HDR_VERSION);
+		net->netdev->stats.rx_dropped++;
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void hf_recv_ip_with_payload(struct hf_net *net,
+				    struct hfi_ip_with_payload_pkt *pkt,
+				    u32 pkt_len)
+{
+	u32			len, resid;
+	struct hf_if		*net_if = &(net->hfif);
+	struct net_device	*netdev = net->netdev;
+	struct hf_if_proto_hdr	*hf_hdr;
+	struct sk_buff		*skb;
+	void			*src, *dst;
+	u32			cache_ln_num = 0;
+	u16			proto;
+
+	/* retrieve the protocol header pointer */
+	hf_hdr = (struct hf_if_proto_hdr *)(pkt->payload);
+
+	if (hf_check_hdr_version(net, hf_hdr) != 0)
+		return;
+
+	switch (hf_hdr->msg_type) {
+	case HF_IF_ARP:
+		proto = htons(ETH_P_ARP);
+		break;
+
+	case HF_IF_FIFO:
+		proto = htons(ETH_P_IP);
+		break;
+
+	default:
+		netdev_err(net->netdev,
+			"hf_recv_ip_with_payload: unknown msg_type 0x%x\n",
+			hf_hdr->msg_type);
+		netdev->stats.rx_dropped++;
+		return;
+	}
+
+	len = hf_hdr->msg_len - HF_PROTO_LEN;
+
+	skb = netdev_alloc_skb_ip_align(net->netdev,
+				len + ETH_HLEN + HF_ALIGN_PAD);
+	if (!skb) {
+		netdev_err(net->netdev, "hf_recv_ip_with_payload: "
+				"netdev_alloc_skb_ip_align fail\n");
+		netdev->stats.rx_dropped++;
+		BUG();
+		return;
+	}
+
+	skb_reserve(skb, HF_ALIGN_PAD);
+	skb->protocol = proto;
+
+	skb_put(skb, len + ETH_HLEN);
+
+	/* construct ethhdr from base hdr */
+	hf_construct_hwhdr(net_if, skb, &(pkt->hfi_hdr.base_hdr));
+
+	skb_reset_mac_header(skb);
+
+	skb_pull(skb, ETH_HLEN);
+
+	src = (void *)(hf_hdr + 1);
+	dst = (void *)skb->data;
+
+	/* check if the payload wrapped the rx_fifo */
+	if ((net_if->rx_fifo.head + (pkt_len - 1)) > net_if->rx_fifo.emax) {
+		/* Wrapped */
+		cache_ln_num = net_if->rx_fifo.emax - net_if->rx_fifo.head + 1;
+		resid  = cache_ln_num << HFI_CACHE_LINE_SHIFT;
+		resid -= (HF_IP_HDR_LEN + HF_PROTO_LEN);
+
+		/* For netboot, pkt_len maybe larger than len */
+		if (resid > len)
+			resid = len;
+
+		memcpy(dst, src, resid);
+
+		src = (void *)net_if->rx_fifo.addr;
+		dst = (void *)skb->data + resid;
+		len -= resid;
+	}
+
+	/* copy the rest of payload */
+	if (len > 0)
+		memcpy(dst, src, len);
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	netdev->stats.rx_packets++;
+	netdev->stats.rx_bytes += skb->len;
+
+	netif_receive_skb(skb);
+}
+
+static void hf_recv_ip_good(struct hf_net *net,
+			    struct hfi_hdr *rx_curr,
+			    u32 pkt_len)
+{
+	switch (rx_curr->type.header_type) {
+
+	case  HFI_IP_WITH_PAYLOAD:
+	case  HFI_IP_MULTICAST_WITH_PAYLOAD:
+		hf_recv_ip_with_payload(net,
+			(struct hfi_ip_with_payload_pkt *)rx_curr, pkt_len);
+		break;
+
+	default:
+		netdev_err(net->netdev, "hf_rx: receive unknown "
+			"headerType = 0x%x, pkt_len = 0x%x\n",
+			rx_curr->type.header_type, pkt_len);
+
+		/* unknown packet, drop it */
+		net->netdev->stats.rx_dropped++;
+		break;
+	}
+}
+
+static int hf_rx(struct hf_net *net, int budget)
+{
+	int		num = 0;
+	struct hf_if	*net_if = &(net->hfif);
+	u32		pkt_len, status;
+	struct hfi_hdr	*rx_curr;
+	u32		job_id, pkt_valid;
+
+	rx_curr = (struct hfi_hdr *) (net_if->rx_fifo.addr +
+			(net_if->rx_fifo.head << HFI_CACHE_LINE_SHIFT));
+
+	while (budget != 0) {
+		job_id = rx_curr->id.job_id;
+		pkt_valid = rx_curr->base_hdr.pkt_valid;
+
+		isync();
+		if ((job_id != HF_IP_JOBID) ||
+		    (pkt_valid != net_if->rx_pkt_valid))
+			break;
+
+		pkt_len = hfi_pktlen_to_cachelines(rx_curr->base_hdr.pkt_len);
+
+		status = rx_curr->base_hdr.status;
+		if (status == HFI_PKT_STATUS_GOOD) {
+			hf_recv_ip_good(net, rx_curr, pkt_len);
+		} else {
+			/* bad packet */
+			netdev_err(net->netdev, "hf_rx: receive bad "
+				"status = 0x%x, pkt_len = 0x%x\n",
+				status, pkt_len);
+
+			net->netdev->stats.rx_dropped++;
+		}
+
+		net->netdev->last_rx = jiffies;
+
+		hf_advance_rx_head(net_if, pkt_len);
+
+		/* Make sure the jobid is invalidated before posting to hw */
+		wmb();
+
+		net_if->rx_fslot_debt += pkt_len;
+		if (net_if->rx_fslot_debt >= HF_INC_FSLOT_WATERMARK) {
+			hf_mmio_regs_write(net_if, HFI_RFIFO_INC_FSLOT_REG,
+					net_if->rx_fslot_debt);
+			net_if->rx_fslot_debt = 0;
+		}
+
+		budget--;
+		num++;
+		rx_curr = net_if->rx_fifo.addr +
+			(net_if->rx_fifo.head << HFI_CACHE_LINE_SHIFT);
+
+	}
+
+	netdev_dbg(net->netdev, "hf_rx: exit, head = 0x%x, recv 0x%x pkts\n",
+			net_if->rx_fifo.head, num);
+
+	return num;
+}
+
 static void hf_tx_recycle(struct hf_if *net_if)
 {
 	u32		head, head_idx, slots_per_blk;
@@ -906,6 +1196,30 @@ static void hf_if_setup(struct net_device *netdev)
 	memcpy(netdev->broadcast, hfi_bcast_addr, ETH_ALEN);
 }
 
+static int hf_poll(struct napi_struct *napi, int budget)
+{
+	int			work_done;
+	struct net_device	*netdev;
+	struct hf_net		*net;
+	struct hf_if		*net_if;
+
+	net	= container_of(napi, struct hf_net, napi);
+	net_if	= &(net->hfif);
+	netdev	= net->netdev;
+
+	work_done = hf_rx(net, budget);
+
+	/* Always assume we have received all available packets */
+	/*  and set recv intr for next packet */
+	if (work_done < budget) {
+		napi_complete(napi);
+		isync();
+		hf_set_recv_intr(net_if);
+	}
+
+	return work_done;
+}
+
 static struct hf_net *hf_init_netdev(int idx, int ai)
 {
 	struct net_device	*netdev;
@@ -924,6 +1238,7 @@ static struct hf_net *hf_init_netdev(int idx, int ai)
 	}
 
 	net = netdev_priv(netdev);
+	netif_napi_add(netdev, &(net->napi), hf_poll, HF_NAPI_WEIGHT);
 	net->netdev = netdev;
 
 	memset(&(net->hfif), 0, sizeof(struct hf_if));
@@ -939,11 +1254,16 @@ static struct hf_net *hf_init_netdev(int idx, int ai)
 		netdev_err(netdev, "hf_init_netdev: "
 				"failed to register netdev=hfi%d:hf%d, "
 				"rc = 0x%x\n", ai, idx, rc);
-		free_netdev(netdev);
-		return ERR_PTR(-ENODEV);
+		goto err_out1;
 	}
 
 	return net;
+
+err_out1:
+	netif_napi_del(&(net->napi));
+	free_netdev(netdev);
+
+	return ERR_PTR(-ENODEV);
 }
 
 static void hf_del_netdev(struct hf_net *net)
@@ -952,6 +1272,8 @@ static void hf_del_netdev(struct hf_net *net)
 
 	unregister_netdev(netdev);
 
+	netif_napi_del(&(net->napi));
+
 	free_netdev(netdev);
 }
 
diff --git a/include/linux/hfi/hfi_ip.h b/include/linux/hfi/hfi_ip.h
index 4e70c14..ec87300 100644
--- a/include/linux/hfi/hfi_ip.h
+++ b/include/linux/hfi/hfi_ip.h
@@ -38,6 +38,7 @@
 #include <linux/init.h>
 #include <linux/netdevice.h>
 #include <linux/inetdevice.h>
+#include <linux/etherdevice.h>
 #include <net/arp.h>
 
 #include <linux/hfi/hfidd_internal.h>
@@ -56,6 +57,12 @@
 #define HF_NAPI_WEIGHT			256
 #define HF_MAX_NAME_LEN			64
 
+/* rfifo intr */
+#define HF_RFIFO_OUT_CNTL_REARM		0	/* 0 to disable interrupt */
+#define HF_IMM_RECV_INTR		0xf0000000	/* bit 32-35 on */
+#define HF_ENA_RECV_INTR		0xc0000000	/* bit 32-33 on */
+#define HF_RECV_INTR_MATCH_SHIFT	7	/* bit 37-56 */
+
 /* sfifo intr: bit 39-55 is threshold */
 /*             bit 34 enable, bit 35 unmask */
 #define HF_SFIFO_INTR_ENABLE		(0x3 << (63 - 35))
@@ -74,11 +81,17 @@
 #define HF_FV_BIT_MAX			31
 #define HF_SEND_ONE			1
 
+#define HF_RFIFO_CACHE_INJ_TH		7ULL
+#define HF_RFIFO_CACHE_INJ_TH_SHIFT	61
+#define HF_RFIFO_OUT_THRESH		0
+
 #define HF_PAYLOAD_MAX			(2048 - HF_IP_HDR_LEN - HF_PROTO_LEN)
 #define HF_NET_MTU			HF_PAYLOAD_MAX
 #define HF_PAYLOAD_RX_THRESHOLD		0x10ULL
 #define HF_PAYLOAD_RX_THRESH_SHIFT	59
 
+#define HF_INC_FSLOT_WATERMARK		(HF_RFIFO_SLOTS >> 3)
+
 struct hfi_ip_extended_hdr {            /* 16B */
 	unsigned int	immediate_len:7;/* In bytes */
 	unsigned int	num_desc:3;     /* number of descriptors */
@@ -99,7 +112,9 @@ struct hfi_ip_with_payload_pkt {
 
 #define HF_IP_HDR_LEN			((sizeof(struct hfi_hdr) + \
 				sizeof(struct hfi_ip_extended_hdr)))
+
 #define HF_ALIGN_PAD			2
+
 #define HF_PROTO_HDR_VERSION		0x1
 /* HFI protocol message type */
 #define	HF_IF_ARP			0xA0
@@ -146,7 +161,10 @@ struct hf_if {
 	u32			sfifo_fv_polarity;
 	u32			sfifo_slots_per_blk;
 	u32			sfifo_packets;
+	u32			rx_pkt_valid;		/* Polarity of recv
+							   packet valid bit */
 	u32			msg_id;
+	u32			rx_fslot_debt;
 	void __iomem		*doorbell;		/* mapped mmio_regs */
 	struct hf_fifo		tx_fifo;
 	struct hf_fifo		rx_fifo;
@@ -159,6 +177,7 @@ struct hf_if {
 /* Private structure for HF inetrface */
 struct hf_net {
 	struct net_device	*netdev;
+	struct napi_struct	napi;
 	struct hf_if		hfif;
 };
 
@@ -172,7 +191,7 @@ struct hf_global_info {
 
 extern struct hf_global_info	hf_ginfo;
 
-#define HF_EVENT_NUM		1
+#define HF_EVENT_NUM		2
 
 struct hf_events_cb {
 	enum hfi_event_type	type;
@@ -182,6 +201,11 @@ struct hf_events_cb {
 #define HF_MAC_HFI_SHIFT	12
 #define HF_HDR_HFI_SHIFT	8
 
+static inline u32 hf_get_mac(u32 w)
+{
+	return ((w >> HF_HDR_HFI_SHIFT) << HF_MAC_HFI_SHIFT) | (w & 0xFF);
+}
+
 static inline u32 hf_get_win(u16 id)
 {
 	return ((id >> HF_MAC_HFI_SHIFT) << HF_HDR_HFI_SHIFT) | (id & 0xFF);
-- 
1.7.3.5


^ permalink raw reply related

* [PATCH v4 20/27] HFI: Close window hypervisor call
From: dykmanj @ 2011-04-25 21:24 UTC (permalink / raw)
  To: netdev
  Cc: Jim Dykman, Piyush Chaudhary, Fu-Chung Chang,  William S. Cadden,
	 Wen C. Chen, Scot Sakolish, Jian Xiao,  Carol L. Soto,
	 Sarah J. Sheppard
In-Reply-To: <1303766647-30156-1-git-send-email-dykmanj@linux.vnet.ibm.com>

From: Jim Dykman <dykmanj@linux.vnet.ibm.com>

Signed-off-by:  Piyush Chaudhary <piyushc@linux.vnet.ibm.com>
Signed-off-by:  Jim Dykman <dykmanj@linux.vnet.ibm.com>
Signed-off-by:  Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
Signed-off-by:  William S. Cadden <wscadden@linux.vnet.ibm.com>
Signed-off-by:  Wen C. Chen <winstonc@linux.vnet.ibm.com>
Signed-off-by:  Scot Sakolish <sakolish@linux.vnet.ibm.com>
Signed-off-by:  Jian Xiao <jian@linux.vnet.ibm.com>
Signed-off-by:  Carol L. Soto <clsoto@linux.vnet.ibm.com>
Signed-off-by:  Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
---
 drivers/net/hfi/core/hfidd_hcalls.c |   22 ++++++++++++++
 drivers/net/hfi/core/hfidd_proto.h  |    1 +
 drivers/net/hfi/core/hfidd_window.c |   53 +++++++++++++++++++++++++++++++++--
 3 files changed, 73 insertions(+), 3 deletions(-)

diff --git a/drivers/net/hfi/core/hfidd_hcalls.c b/drivers/net/hfi/core/hfidd_hcalls.c
index 1915336..4bc6525 100644
--- a/drivers/net/hfi/core/hfidd_hcalls.c
+++ b/drivers/net/hfi/core/hfidd_hcalls.c
@@ -153,6 +153,17 @@ static inline long long h_hfi_open_window(int token,
 	return rc;
 }
 
+static inline long long h_hfi_close_window(int token,
+		u64 HFI_chip_ID,
+		u64 win_num,
+		u64 flag)
+{
+	return plpar_hcall_norets(token,
+		HFI_chip_ID,
+		win_num,
+		flag);
+}
+
 long long hfi_start_nmmu(u64 chip_id, void *nmmu_info)
 {
 	return h_nmmu_start(H_NMMU_START, chip_id, nmmu_info);
@@ -249,6 +260,17 @@ long long hfi_modify_mr(u64 chip_id, u64 request, u64 mr_handle,
 	return hvrc;
 }
 
+long long hfi_close_window(u64 unit_id, u64 win_id, u64 flag)
+{
+	long long hvrc;
+
+	hvrc = h_hfi_close_window(H_HFI_CLOSE_WINDOW,
+			unit_id,
+			win_id,
+			flag);
+			return hvrc;
+}
+
 long long hfi_free_mr(u64 chip_id, u64 res, u64 mr_handle, u64 sub_region_id)
 {
 	long long	hvrc;
diff --git a/drivers/net/hfi/core/hfidd_proto.h b/drivers/net/hfi/core/hfidd_proto.h
index e065d56..f531dcd 100644
--- a/drivers/net/hfi/core/hfidd_proto.h
+++ b/drivers/net/hfi/core/hfidd_proto.h
@@ -94,6 +94,7 @@ long long hfi_modify_mr(u64 chip_id, u64 request, u64 mr_handle,
 		u64 e_addr,
 		u64 l_addr,
 		u64 num_pg_sz);
+long long hfi_close_window(u64 unit_id, u64 win_id, u64 flag);
 long long hfi_free_mr(u64 chip_id, u64 res, u64 mr_handle,
 		u64 sub_region_id);
 long long hfi_hquery_interface(u64 unit_id, u64 subtype, u64 query_p,
diff --git a/drivers/net/hfi/core/hfidd_window.c b/drivers/net/hfi/core/hfidd_window.c
index 3cfe5c3..fd692eb 100644
--- a/drivers/net/hfi/core/hfidd_window.c
+++ b/drivers/net/hfi/core/hfidd_window.c
@@ -459,6 +459,43 @@ static int hfi_hcall_to_open_window(struct hfidd_acs *p_acs,
 	return 0;
 }
 
+/* Call to CLOSE WINDOW hcall */
+static int hfi_hcall_to_close_window(struct hfidd_acs *p_acs,
+		struct hfidd_window *win_p)
+{
+	int	rc = 0;
+	long long hvrc = 0;
+	u64	start_time = get_jiffies_64();
+
+	hvrc = hfi_close_window(p_acs->dds.hfi_id,
+			win_p->index,
+			H_CLOSE);
+
+	/*
+	 * Need to call CLOSE WINDOW with flag H_CHECK_CLOSED
+	 * to check when the window is completely closed
+	 */
+	while (hvrc == H_BUSY) {
+		hvrc = hfi_close_window(p_acs->dds.hfi_id,
+				win_p->index,
+				H_CHECK_CLOSED);
+		if (hvrc != H_BUSY)
+			break;
+		if (hfidd_age_hcall(start_time))
+			break;
+	}
+
+	if (hvrc != H_SUCCESS) {
+		win_p->state = WIN_FAIL_CLOSE;
+		rc = -EIO;
+		dev_printk(KERN_ERR, p_acs->hfidd_dev,
+			"hfi_hcall_to_close_window: CLOSE WINDOW failed, "
+			"hvrc=0x%llx\n", hvrc);
+	}
+
+	return rc;
+}
+
 /*
  * Map the Effective Address pages for Memory Regions.
  * If more than one page, need to setup a page containing
@@ -1005,7 +1042,7 @@ int hfidd_open_window_func(struct hfidd_acs *p_acs, unsigned int is_userspace,
 		dev_printk(KERN_ERR, p_acs->hfidd_dev,
 			"hfidd_open_window_func: hfi_map_mmio_regs "
 			"failed, rc = 0x%x\n", rc);
-		goto hfidd_open_window_func_err4;
+		goto hfidd_open_window_func_err5;
 	}
 
 	/* tell user the local ISR id */
@@ -1019,7 +1056,7 @@ int hfidd_open_window_func(struct hfidd_acs *p_acs, unsigned int is_userspace,
 		dev_printk(KERN_ERR, p_acs->hfidd_dev,
 			"hfidd_open_window_func: hfi_copy_to_user "
 			"failed, rc = 0x%x\n", rc);
-		goto hfidd_open_window_func_err5;
+		goto hfidd_open_window_func_err6;
 	}
 
 	spin_lock(&(win_p->win_lock));
@@ -1031,9 +1068,11 @@ int hfidd_open_window_func(struct hfidd_acs *p_acs, unsigned int is_userspace,
 	kfree(local_p);
 	return rc;
 
-hfidd_open_window_func_err5:
+hfidd_open_window_func_err6:
 	if (is_userspace)
 		hfidd_unmap(local_p->mmio_regs.use.kptr, PAGE_SIZE_64K);
+hfidd_open_window_func_err5:
+	hfi_hcall_to_close_window(p_acs, win_p);
 hfidd_open_window_func_err4:
 	hfi_destroy_window_parm(p_acs, is_userspace, win_p, local_p);
 hfidd_open_window_func_err3:
@@ -1103,6 +1142,14 @@ int hfidd_close_window_internal(struct hfidd_acs *p_acs,
 		goto hfidd_close_window_internal_err0;
 	}
 
+	rc = hfi_hcall_to_close_window(p_acs, win_p);
+	if (rc) {
+		dev_printk(KERN_ERR, p_acs->hfidd_dev,
+			"hfidd_close_window_internal: hfi_hcall_to_close_window "
+			"failed, rc = 0x%x\n", rc);
+		goto hfidd_close_window_internal_err0;
+	}
+
 	hfi_destroy_window_info(p_acs, win_p);
 
 	/* Call hcall to unregister MR in the MMU */
-- 
1.7.3.5


^ permalink raw reply related

* Re: [Bugme-new] [Bug 33902] New: tcpi_state field in tcp_info structure reports TCP_CLOSE instead of TCP_TIME_WAIT state
From: Andrew Morton @ 2011-04-25 21:34 UTC (permalink / raw)
  To: netdev; +Cc: bugzilla-daemon, bugme-daemon, Dmitry.Izbitsky
In-Reply-To: <bug-33902-10286@https.bugzilla.kernel.org/>


(switched to email.  Please respond via emailed reply-to-all, not via the
bugzilla web interface).

On Mon, 25 Apr 2011 08:08:36 GMT
bugzilla-daemon@bugzilla.kernel.org wrote:

> https://bugzilla.kernel.org/show_bug.cgi?id=33902
> 
>            Summary: tcpi_state field in tcp_info structure reports
>                     TCP_CLOSE instead of TCP_TIME_WAIT state
>            Product: Networking
>            Version: 2.5
>     Kernel Version: 2.6.38
>           Platform: All
>         OS/Version: Linux
>               Tree: Mainline
>             Status: NEW
>           Severity: normal
>           Priority: P1
>          Component: IPV4
>         AssignedTo: shemminger@linux-foundation.org
>         ReportedBy: Dmitry.Izbitsky@oktetlabs.ru
>         Regression: No
> 
> 
> Setup - TCP connection in ESTABLISHED state. Local socket calls
> shutdown(SHUT_RDWR). After that peer calls shutdown(SHUT_RDWR).
> 
> Local socket should now be in TIME_WAIT state (from specification point 
> of view). And it's indeed in TIME_WAIT (TCP_TIME_WAIT) state if we look at 
> /proc/net/tcp (or netstat -t). However, if one tries to get connection state
> via tcp_info (getsockopt(TCP_INFO)) the reported state is CLOSED (TCP_CLOSE).
> 
> Looks like the problem is in tcp_time_wait() function
> (net/ipv4/tcp_minisocks.c).
> It's called with state=TCP_TIME_WAIT, and sets inet_timewaitk_sock
> *tw->tw_state field to TCP_TIME_WAIT. That's why the state is reported
> correctly when looking into /proc. However, at the end it calls tcp_done(sk),
> which itself calls tcp_set_state(TCP_CLOSE), so sk->sk_state is set to
> TCP_CLOSE instead of TCP_TIME_WAIT. And it's reported this way via TCP_INFO
> socket option.
> 
> Problem is reproduced on 2.6.26, 2.6.38 and is probably observed on earlier
> kernels.


^ permalink raw reply

* [RFC PATCH] netlink: Increase netlink dump skb message size
From: Greg Rose @ 2011-04-25 22:01 UTC (permalink / raw)
  To: netdev; +Cc: bhutchings, davem

The message size allocated for rtnl info dumps was limited to a single page.
This is not enough for additional interface info available with devices
that support SR-IOV.  Check that the amount of data allocated is sufficient
for the amount of data requested.

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
---

 include/linux/rtnetlink.h |    1 +
 net/core/rtnetlink.c      |    6 ++++++
 net/netlink/af_netlink.c  |   37 +++++++++++++++++++++++++++++++------
 3 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index bbad657..d1ff937 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -622,6 +622,7 @@ extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
 extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst,
 			      u32 id, u32 ts, u32 tsage, long expires,
 			      u32 error);
+extern size_t rtnl_get_nlmsg_size(const struct net_device *dev);
 
 extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data);
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d7c4bb4..001c947 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -764,6 +764,12 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev)
 	       + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
 }
 
+size_t rtnl_get_nlmsg_size(const struct net_device *dev)
+{
+	return if_nlmsg_size(dev);
+}
+EXPORT_SYMBOL(rtnl_get_nlmsg_size);
+
 static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
 {
 	struct nlattr *vf_ports;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c8f35b5..5b1106c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1664,23 +1664,48 @@ static void netlink_destroy_callback(struct netlink_callback *cb)
 static int netlink_dump(struct sock *sk)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
+	struct net *net = sock_net(sk);
 	struct netlink_callback *cb;
 	struct sk_buff *skb;
 	struct nlmsghdr *nlh;
+	struct net_device *dev;
+	struct hlist_head *head;
+	struct hlist_node *node;
 	int len, err = -ENOBUFS;
-
-	skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
-	if (!skb)
-		goto errout;
+	int h, s_h;
+	int idx = 0, s_idx;
+	size_t alloc_size = NLMSG_GOODSIZE;
 
 	mutex_lock(nlk->cb_mutex);
 
 	cb = nlk->cb;
 	if (cb == NULL) {
 		err = -EINVAL;
-		goto errout_skb;
+		goto errout;
 	}
 
+	s_h = cb->args[0];
+	s_idx = cb->args[1];
+
+	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+		idx = 0;
+		head = &net->dev_index_head[h];
+		hlist_for_each_entry(dev, node, head, index_hlist) {
+			if (idx < s_idx) {
+				idx++;
+				continue;
+			}
+			alloc_size = rtnl_get_nlmsg_size(dev);
+			if (alloc_size < NLMSG_GOODSIZE)
+				alloc_size = NLMSG_GOODSIZE;
+			break;
+		}
+	}
+
+	skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL);
+	if (!skb)
+		goto errout;
+
 	len = cb->dump(skb, cb);
 
 	if (len > 0) {
@@ -1717,9 +1742,9 @@ static int netlink_dump(struct sock *sk)
 	return 0;
 
 errout_skb:
-	mutex_unlock(nlk->cb_mutex);
 	kfree_skb(skb);
 errout:
+	mutex_unlock(nlk->cb_mutex);
 	return err;
 }
 


^ permalink raw reply related

* [PATCH net-next 1/6] tg3: Fix int generation hw bug for 5719 / 5720
From: Matt Carlson @ 2011-04-25 22:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

On the 5719 and 5720, there is a bug where the hardware will
misinterpret a status tag update and leave interrupts permanently
disabled.  This patch enables a hardware fix that works around the
issue.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
Reviewed-by: Benjamin Li <benli@broadcom.com>
---
 drivers/net/tg3.c |    3 +++
 drivers/net/tg3.h |    1 +
 2 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 693f36e..a72d031 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -8198,6 +8198,9 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
 		      ~DMA_RWCTRL_DIS_CACHE_ALIGNMENT;
 		if (tp->pci_chip_rev_id == CHIPREV_ID_57765_A0)
 			val &= ~DMA_RWCTRL_CRDRDR_RDMA_MRRS_MSK;
+		if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_57765 &&
+		    GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5717)
+			val |= DMA_RWCTRL_TAGGED_STAT_WA;
 		tw32(TG3PCI_DMA_RW_CTRL, val | tp->dma_rwctrl);
 	} else if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5784 &&
 		   GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5761) {
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index eaa7669..6f37d2a 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -188,6 +188,7 @@
 #define   METAL_REV_B2			 0x02
 #define TG3PCI_DMA_RW_CTRL		0x0000006c
 #define  DMA_RWCTRL_DIS_CACHE_ALIGNMENT  0x00000001
+#define  DMA_RWCTRL_TAGGED_STAT_WA	 0x00000080
 #define  DMA_RWCTRL_CRDRDR_RDMA_MRRS_MSK 0x00000380
 #define  DMA_RWCTRL_READ_BNDRY_MASK	 0x00000700
 #define  DMA_RWCTRL_READ_BNDRY_DISAB	 0x00000000
-- 
1.7.3.4



^ permalink raw reply related

* [PATCH net-next 5/6] tg3: Whitespace cleanups
From: Matt Carlson @ 2011-04-25 22:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

This patch gets rid of some harmless whitespace errors.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
Reviewed-by: Benjamin Li <benli@broadcom.com>
---
 drivers/net/tg3.c |    9 ++-------
 1 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 6bc43ed..696be59 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -7666,8 +7666,6 @@ static int tg3_load_5701_a0_firmware_fix(struct tg3 *tp)
 	return 0;
 }
 
-/* 5705 needs a special version of the TSO firmware.  */
-
 /* tp->lock is held. */
 static int tg3_load_tso_firmware(struct tg3 *tp)
 {
@@ -10179,7 +10177,6 @@ static int tg3_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 		tp->tg3_flags &= ~TG3_FLAG_WOL_ENABLE;
 	spin_unlock_bh(&tp->lock);
 
-
 	return 0;
 }
 
@@ -12925,7 +12922,7 @@ static void __devinit tg3_get_eeprom_hw_cfg(struct tg3 *tp)
 done:
 	if (tp->tg3_flags & TG3_FLAG_WOL_CAP)
 		device_set_wakeup_enable(&tp->pdev->dev,
-				 tp->tg3_flags & TG3_FLAG_WOL_ENABLE);
+					 tp->tg3_flags & TG3_FLAG_WOL_ENABLE);
 	else
 		device_set_wakeup_capable(&tp->pdev->dev, false);
 }
@@ -13749,7 +13746,6 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 	    (tp->tg3_flags2 & TG3_FLG2_5780_CLASS))
 		tp->tg3_flags2 |= TG3_FLG2_5750_PLUS;
 
-
 	if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705) ||
 	    (tp->tg3_flags2 & TG3_FLG2_5750_PLUS))
 		tp->tg3_flags2 |= TG3_FLG2_5705_PLUS;
@@ -14034,7 +14030,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 	    (tp->tg3_flags3 & TG3_FLG3_57765_PLUS))
 		tp->tg3_flags |= TG3_FLAG_CPMU_PRESENT;
 
-	/* Set up tp->grc_local_ctrl before calling tg_power_up().
+	/* Set up tp->grc_local_ctrl before calling tg3_power_up().
 	 * GPIO1 driven high will bring 5700's external PHY out of reset.
 	 * It is also used as eeprom write protect on LOMs.
 	 */
@@ -14829,7 +14825,6 @@ static int __devinit tg3_test_dma(struct tg3 *tp)
 	}
 	if ((tp->dma_rwctrl & DMA_RWCTRL_WRITE_BNDRY_MASK) !=
 	    DMA_RWCTRL_WRITE_BNDRY_16) {
-
 		/* DMA test passed without adjusting DMA boundary,
 		 * now look for chipsets that are known to expose the
 		 * DMA bug without failing the test.
-- 
1.7.3.4



^ permalink raw reply related

* [PATCH net-next 3/6] tg3: Add TSO loopback test
From: Matt Carlson @ 2011-04-25 22:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson, Benjamin Li, Michael Chan

This patch adds code to exercise the TSO portion of the device through
a phy loopback test.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Signed-off-by: Benjamin Li <benli@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |  158 +++++++++++++++++++++++++++++++++++++++++------------
 1 files changed, 123 insertions(+), 35 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 88cd231..fb2139a 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -11076,11 +11076,35 @@ static int tg3_test_memory(struct tg3 *tp)
 
 #define TG3_MAC_LOOPBACK	0
 #define TG3_PHY_LOOPBACK	1
+#define TG3_TSO_LOOPBACK	2
+
+#define TG3_TSO_MSS		500
+
+#define TG3_TSO_IP_HDR_LEN	20
+#define TG3_TSO_TCP_HDR_LEN	20
+#define TG3_TSO_TCP_OPT_LEN	12
+
+static const u8 tg3_tso_header[] = {
+0x08, 0x00,
+0x45, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x40, 0x00,
+0x40, 0x06, 0x00, 0x00,
+0x0a, 0x00, 0x00, 0x01,
+0x0a, 0x00, 0x00, 0x02,
+0x0d, 0x00, 0xe0, 0x00,
+0x00, 0x00, 0x01, 0x00,
+0x00, 0x00, 0x02, 0x00,
+0x80, 0x10, 0x10, 0x00,
+0x14, 0x09, 0x00, 0x00,
+0x01, 0x01, 0x08, 0x0a,
+0x11, 0x11, 0x11, 0x11,
+0x11, 0x11, 0x11, 0x11,
+};
 
 static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
 {
 	u32 mac_mode, rx_start_idx, rx_idx, tx_idx, opaque_key;
-	u32 desc_idx, coal_now;
+	u32 base_flags = 0, mss = 0, desc_idx, coal_now, data_off, val;
 	struct sk_buff *skb, *rx_skb;
 	u8 *tx_data;
 	dma_addr_t map;
@@ -11119,9 +11143,7 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
 		else
 			mac_mode |= MAC_MODE_PORT_MODE_GMII;
 		tw32(MAC_MODE, mac_mode);
-	} else if (loopback_mode == TG3_PHY_LOOPBACK) {
-		u32 val;
-
+	} else {
 		if (tp->phy_flags & TG3_PHYFLG_IS_FET) {
 			tg3_phy_fet_toggle_apd(tp, false);
 			val = BMCR_LOOPBACK | BMCR_FULLDPLX | BMCR_SPEED100;
@@ -11169,8 +11191,6 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
 				break;
 			mdelay(1);
 		}
-	} else {
-		return -EINVAL;
 	}
 
 	err = -EIO;
@@ -11186,7 +11206,54 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
 
 	tw32(MAC_RX_MTU_SIZE, tx_len + ETH_FCS_LEN);
 
-	for (i = 14; i < tx_len; i++)
+	if (loopback_mode == TG3_TSO_LOOPBACK) {
+		struct iphdr *iph = (struct iphdr *)&tx_data[ETH_HLEN];
+
+		u32 hdr_len = TG3_TSO_IP_HDR_LEN + TG3_TSO_TCP_HDR_LEN +
+			      TG3_TSO_TCP_OPT_LEN;
+
+		memcpy(tx_data + ETH_ALEN * 2, tg3_tso_header,
+		       sizeof(tg3_tso_header));
+		mss = TG3_TSO_MSS;
+
+		val = tx_len - ETH_ALEN * 2 - sizeof(tg3_tso_header);
+		num_pkts = DIV_ROUND_UP(val, TG3_TSO_MSS);
+
+		/* Set the total length field in the IP header */
+		iph->tot_len = htons((u16)(mss + hdr_len));
+
+		base_flags = (TXD_FLAG_CPU_PRE_DMA |
+			      TXD_FLAG_CPU_POST_DMA);
+
+		if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) {
+			struct tcphdr *th;
+			val = ETH_HLEN + TG3_TSO_IP_HDR_LEN;
+			th = (struct tcphdr *)&tx_data[val];
+			th->check = 0;
+		} else
+			base_flags |= TXD_FLAG_TCPUDP_CSUM;
+
+		if (tp->tg3_flags2 & TG3_FLG2_HW_TSO_3) {
+			mss |= (hdr_len & 0xc) << 12;
+			if (hdr_len & 0x10)
+				base_flags |= 0x00000010;
+			base_flags |= (hdr_len & 0x3e0) << 5;
+		} else if (tp->tg3_flags2 & TG3_FLG2_HW_TSO_2)
+			mss |= hdr_len << 9;
+		else if ((tp->tg3_flags2 & TG3_FLG2_HW_TSO_1) ||
+			 GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705) {
+			mss |= (TG3_TSO_TCP_OPT_LEN << 9);
+		} else {
+			base_flags |= (TG3_TSO_TCP_OPT_LEN << 10);
+		}
+
+		data_off = ETH_ALEN * 2 + sizeof(tg3_tso_header);
+	} else {
+		num_pkts = 1;
+		data_off = ETH_HLEN;
+	}
+
+	for (i = data_off; i < tx_len; i++)
 		tx_data[i] = (u8) (i & 0xff);
 
 	map = pci_map_single(tp->pdev, skb->data, tx_len, PCI_DMA_TODEVICE);
@@ -11202,12 +11269,10 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
 
 	rx_start_idx = rnapi->hw_status->idx[0].rx_producer;
 
-	num_pkts = 0;
-
-	tg3_set_txd(tnapi, tnapi->tx_prod, map, tx_len, 0, 1);
+	tg3_set_txd(tnapi, tnapi->tx_prod, map, tx_len,
+		    base_flags, (mss << 1) | 1);
 
 	tnapi->tx_prod++;
-	num_pkts++;
 
 	tw32_tx_mbox(tnapi->prodmbox, tnapi->tx_prod);
 	tr32_mailbox(tnapi->prodmbox);
@@ -11237,38 +11302,56 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
 	if (rx_idx != rx_start_idx + num_pkts)
 		goto out;
 
-	desc = &rnapi->rx_rcb[rx_start_idx];
-	desc_idx = desc->opaque & RXD_OPAQUE_INDEX_MASK;
-	opaque_key = desc->opaque & RXD_OPAQUE_RING_MASK;
+	val = data_off;
+	while (rx_idx != rx_start_idx) {
+		desc = &rnapi->rx_rcb[rx_start_idx++];
+		desc_idx = desc->opaque & RXD_OPAQUE_INDEX_MASK;
+		opaque_key = desc->opaque & RXD_OPAQUE_RING_MASK;
 
-	if ((desc->err_vlan & RXD_ERR_MASK) != 0 &&
-	    (desc->err_vlan != RXD_ERR_ODD_NIBBLE_RCVD_MII))
-		goto out;
+		if ((desc->err_vlan & RXD_ERR_MASK) != 0 &&
+		    (desc->err_vlan != RXD_ERR_ODD_NIBBLE_RCVD_MII))
+			goto out;
 
-	rx_len = ((desc->idx_len & RXD_LEN_MASK) >> RXD_LEN_SHIFT) - 4;
-	if (rx_len != tx_len)
-		goto out;
+		rx_len = ((desc->idx_len & RXD_LEN_MASK) >> RXD_LEN_SHIFT)
+			 - ETH_FCS_LEN;
 
-	if (pktsz <= TG3_RX_STD_DMA_SZ - ETH_FCS_LEN) {
-		if (opaque_key != RXD_OPAQUE_RING_STD)
-			goto out;
+		if (loopback_mode != TG3_TSO_LOOPBACK) {
+			if (rx_len != tx_len)
+				goto out;
 
-		rx_skb = tpr->rx_std_buffers[desc_idx].skb;
-		map = dma_unmap_addr(&tpr->rx_std_buffers[desc_idx], mapping);
-	} else {
-		if (opaque_key != RXD_OPAQUE_RING_JUMBO)
+			if (pktsz <= TG3_RX_STD_DMA_SZ - ETH_FCS_LEN) {
+				if (opaque_key != RXD_OPAQUE_RING_STD)
+					goto out;
+			} else {
+				if (opaque_key != RXD_OPAQUE_RING_JUMBO)
+					goto out;
+			}
+		} else if ((desc->type_flags & RXD_FLAG_TCPUDP_CSUM) &&
+			   (desc->ip_tcp_csum & RXD_TCPCSUM_MASK)
+			    >> RXD_TCPCSUM_SHIFT == 0xffff) {
 			goto out;
+		}
 
-		rx_skb = tpr->rx_jmb_buffers[desc_idx].skb;
-		map = dma_unmap_addr(&tpr->rx_jmb_buffers[desc_idx], mapping);
-	}
+		if (opaque_key == RXD_OPAQUE_RING_STD) {
+			rx_skb = tpr->rx_std_buffers[desc_idx].skb;
+			map = dma_unmap_addr(&tpr->rx_std_buffers[desc_idx],
+					     mapping);
+		} else if (opaque_key == RXD_OPAQUE_RING_JUMBO) {
+			rx_skb = tpr->rx_jmb_buffers[desc_idx].skb;
+			map = dma_unmap_addr(&tpr->rx_jmb_buffers[desc_idx],
+					     mapping);
+		} else
+			goto out;
 
-	pci_dma_sync_single_for_cpu(tp->pdev, map, rx_len, PCI_DMA_FROMDEVICE);
+		pci_dma_sync_single_for_cpu(tp->pdev, map, rx_len,
+					    PCI_DMA_FROMDEVICE);
 
-	for (i = 14; i < tx_len; i++) {
-		if (*(rx_skb->data + i) != (u8) (i & 0xff))
-			goto out;
+		for (i = data_off; i < rx_len; i++, val++) {
+			if (*(rx_skb->data + i) != (u8) (val & 0xff))
+				goto out;
+		}
 	}
+
 	err = 0;
 
 	/* tg3_free_rings will unmap and free the rx_skb */
@@ -11278,10 +11361,11 @@ out:
 
 #define TG3_STD_LOOPBACK_FAILED		1
 #define TG3_JMB_LOOPBACK_FAILED		2
+#define TG3_TSO_LOOPBACK_FAILED		4
 
 #define TG3_MAC_LOOPBACK_SHIFT		0
 #define TG3_PHY_LOOPBACK_SHIFT		4
-#define TG3_LOOPBACK_FAILED			0x00000033
+#define TG3_LOOPBACK_FAILED		0x00000077
 
 static int tg3_test_loopback(struct tg3 *tp)
 {
@@ -11358,6 +11442,10 @@ static int tg3_test_loopback(struct tg3 *tp)
 		if (tg3_run_loopback(tp, ETH_FRAME_LEN, TG3_PHY_LOOPBACK))
 			err |= TG3_STD_LOOPBACK_FAILED <<
 			       TG3_PHY_LOOPBACK_SHIFT;
+		if ((tp->tg3_flags2 & TG3_FLG2_TSO_CAPABLE) &&
+		    tg3_run_loopback(tp, ETH_FRAME_LEN, TG3_TSO_LOOPBACK))
+			err |= TG3_TSO_LOOPBACK_FAILED <<
+			       TG3_PHY_LOOPBACK_SHIFT;
 		if ((tp->tg3_flags & TG3_FLAG_JUMBO_RING_ENABLE) &&
 		    tg3_run_loopback(tp, 9000 + ETH_HLEN, TG3_PHY_LOOPBACK))
 			err |= TG3_JMB_LOOPBACK_FAILED <<
-- 
1.7.3.4



^ permalink raw reply related

* [PATCH net-next 4/6] tg3: Add EEH support
From: Matt Carlson @ 2011-04-25 22:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

This patch adds EEH support to the tg3 driver.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
Reviewed-by: Benjamin Li <benli@broadcom.com>
---
 drivers/net/tg3.c |  147 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 147 insertions(+), 0 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index fb2139a..6bc43ed 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -15395,6 +15395,8 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 		    pdev->dma_mask == DMA_BIT_MASK(32) ? 32 :
 		    ((u64)pdev->dma_mask) == DMA_BIT_MASK(40) ? 40 : 64);
 
+	pci_save_state(pdev);
+
 	return 0;
 
 err_out_apeunmap:
@@ -15551,11 +15553,156 @@ static SIMPLE_DEV_PM_OPS(tg3_pm_ops, tg3_suspend, tg3_resume);
 
 #endif /* CONFIG_PM_SLEEP */
 
+/**
+ * tg3_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ */
+static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev,
+					      pci_channel_state_t state)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct tg3 *tp = netdev_priv(netdev);
+	pci_ers_result_t err = PCI_ERS_RESULT_NEED_RESET;
+
+	netdev_info(netdev, "PCI I/O error detected\n");
+
+	rtnl_lock();
+
+	if (!netif_running(netdev))
+		goto done;
+
+	tg3_phy_stop(tp);
+
+	tg3_netif_stop(tp);
+
+	del_timer_sync(&tp->timer);
+	tp->tg3_flags2 &= ~TG3_FLG2_RESTART_TIMER;
+
+	/* Want to make sure that the reset task doesn't run */
+	cancel_work_sync(&tp->reset_task);
+	tp->tg3_flags  &= ~TG3_FLAG_TX_RECOVERY_PENDING;
+	tp->tg3_flags2 &= ~TG3_FLG2_RESTART_TIMER;
+
+	netif_device_detach(netdev);
+
+	/* Clean up software state, even if MMIO is blocked */
+	tg3_full_lock(tp, 0);
+	tg3_halt(tp, RESET_KIND_SHUTDOWN, 0);
+	tg3_full_unlock(tp);
+
+done:
+	if (state == pci_channel_io_perm_failure)
+		err = PCI_ERS_RESULT_DISCONNECT;
+	else
+		pci_disable_device(pdev);
+
+	rtnl_unlock();
+
+	return err;
+}
+
+/**
+ * tg3_io_slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch, as if from a cold-boot.
+ * At this point, the card has exprienced a hard reset,
+ * followed by fixups by BIOS, and has its config space
+ * set up identically to what it was at cold boot.
+ */
+static pci_ers_result_t tg3_io_slot_reset(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct tg3 *tp = netdev_priv(netdev);
+	pci_ers_result_t rc = PCI_ERS_RESULT_DISCONNECT;
+	int err;
+
+	rtnl_lock();
+
+	if (pci_enable_device(pdev)) {
+		netdev_err(netdev, "Cannot re-enable PCI device after reset.\n");
+		goto done;
+	}
+
+	pci_set_master(pdev);
+	pci_restore_state(pdev);
+	pci_save_state(pdev);
+
+	if (!netif_running(netdev)) {
+		rc = PCI_ERS_RESULT_RECOVERED;
+		goto done;
+	}
+
+	err = tg3_power_up(tp);
+	if (err) {
+		netdev_err(netdev, "Failed to restore register access.\n");
+		goto done;
+	}
+
+	rc = PCI_ERS_RESULT_RECOVERED;
+
+done:
+	rtnl_unlock();
+
+	return rc;
+}
+
+/**
+ * tg3_io_resume - called when traffic can start flowing again.
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the error recovery driver tells
+ * us that its OK to resume normal operation.
+ */
+static void tg3_io_resume(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct tg3 *tp = netdev_priv(netdev);
+	int err;
+
+	rtnl_lock();
+
+	if (!netif_running(netdev))
+		goto done;
+
+	tg3_full_lock(tp, 0);
+	tp->tg3_flags |= TG3_FLAG_INIT_COMPLETE;
+	err = tg3_restart_hw(tp, 1);
+	tg3_full_unlock(tp);
+	if (err) {
+		netdev_err(netdev, "Cannot restart hardware after reset.\n");
+		goto done;
+	}
+
+	netif_device_attach(netdev);
+
+	tp->timer.expires = jiffies + tp->timer_offset;
+	add_timer(&tp->timer);
+
+	tg3_netif_start(tp);
+
+	tg3_phy_start(tp);
+
+done:
+	rtnl_unlock();
+}
+
+static struct pci_error_handlers tg3_err_handler = {
+	.error_detected	= tg3_io_error_detected,
+	.slot_reset	= tg3_io_slot_reset,
+	.resume		= tg3_io_resume
+};
+
 static struct pci_driver tg3_driver = {
 	.name		= DRV_MODULE_NAME,
 	.id_table	= tg3_pci_tbl,
 	.probe		= tg3_init_one,
 	.remove		= __devexit_p(tg3_remove_one),
+	.err_handler	= &tg3_err_handler,
 	.driver.pm	= TG3_PM_OPS,
 };
 
-- 
1.7.3.4



^ permalink raw reply related

* [PATCH net-next 2/6] tg3: Organize loopback test failure flags
From: Matt Carlson @ 2011-04-25 22:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

As more test modes are added to each loopback mode, the need to
organise the results increases.  This patch groups the results by
loopback mode, and then by test mode.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
Reviewed-by: Benjamin Li <benli@broadcom.com>
---
 drivers/net/tg3.c |   20 ++++++++++++--------
 1 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index a72d031..88cd231 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -11276,10 +11276,12 @@ out:
 	return err;
 }
 
-#define TG3_MAC_LOOPBACK_FAILED		1
-#define TG3_PHY_LOOPBACK_FAILED		2
-#define TG3_LOOPBACK_FAILED		(TG3_MAC_LOOPBACK_FAILED |	\
-					 TG3_PHY_LOOPBACK_FAILED)
+#define TG3_STD_LOOPBACK_FAILED		1
+#define TG3_JMB_LOOPBACK_FAILED		2
+
+#define TG3_MAC_LOOPBACK_SHIFT		0
+#define TG3_PHY_LOOPBACK_SHIFT		4
+#define TG3_LOOPBACK_FAILED			0x00000033
 
 static int tg3_test_loopback(struct tg3 *tp)
 {
@@ -11338,11 +11340,11 @@ static int tg3_test_loopback(struct tg3 *tp)
 	}
 
 	if (tg3_run_loopback(tp, ETH_FRAME_LEN, TG3_MAC_LOOPBACK))
-		err |= TG3_MAC_LOOPBACK_FAILED;
+		err |= TG3_STD_LOOPBACK_FAILED << TG3_MAC_LOOPBACK_SHIFT;
 
 	if ((tp->tg3_flags & TG3_FLAG_JUMBO_RING_ENABLE) &&
 	    tg3_run_loopback(tp, 9000 + ETH_HLEN, TG3_MAC_LOOPBACK))
-		err |= (TG3_MAC_LOOPBACK_FAILED << 2);
+		err |= TG3_JMB_LOOPBACK_FAILED << TG3_MAC_LOOPBACK_SHIFT;
 
 	if (tp->tg3_flags & TG3_FLAG_CPMU_PRESENT) {
 		tw32(TG3_CPMU_CTRL, cpmuctrl);
@@ -11354,10 +11356,12 @@ static int tg3_test_loopback(struct tg3 *tp)
 	if (!(tp->phy_flags & TG3_PHYFLG_PHY_SERDES) &&
 	    !(tp->tg3_flags3 & TG3_FLG3_USE_PHYLIB)) {
 		if (tg3_run_loopback(tp, ETH_FRAME_LEN, TG3_PHY_LOOPBACK))
-			err |= TG3_PHY_LOOPBACK_FAILED;
+			err |= TG3_STD_LOOPBACK_FAILED <<
+			       TG3_PHY_LOOPBACK_SHIFT;
 		if ((tp->tg3_flags & TG3_FLAG_JUMBO_RING_ENABLE) &&
 		    tg3_run_loopback(tp, 9000 + ETH_HLEN, TG3_PHY_LOOPBACK))
-			err |= (TG3_PHY_LOOPBACK_FAILED << 2);
+			err |= TG3_JMB_LOOPBACK_FAILED <<
+			       TG3_PHY_LOOPBACK_SHIFT;
 	}
 
 	/* Re-enable gphy autopowerdown. */
-- 
1.7.3.4



^ permalink raw reply related

* [PATCH net-next 6/6] tg3: Update version to 3.118
From: Matt Carlson @ 2011-04-25 22:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

This patch updates the tg3 version to 3.118.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
Reviewed-by: Benjamin Li <benli@broadcom.com>
---
 drivers/net/tg3.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 696be59..b20538a 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -64,10 +64,10 @@
 
 #define DRV_MODULE_NAME		"tg3"
 #define TG3_MAJ_NUM			3
-#define TG3_MIN_NUM			117
+#define TG3_MIN_NUM			118
 #define DRV_MODULE_VERSION	\
 	__stringify(TG3_MAJ_NUM) "." __stringify(TG3_MIN_NUM)
-#define DRV_MODULE_RELDATE	"January 25, 2011"
+#define DRV_MODULE_RELDATE	"April 22, 2011"
 
 #define TG3_DEF_MAC_MODE	0
 #define TG3_DEF_RX_MODE		0
-- 
1.7.3.4



^ permalink raw reply related

* [PATCH net-next 0/6] tg3: TSO loopback and EEH support
From: Matt Carlson @ 2011-04-25 22:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

This patchset implements TSO loopback support into the selftest.  It also
adds EEH support.



^ permalink raw reply

* Re: [PATCH net-next-2.6 v5 3/5] sctp: Add socket option operation for Auto-ASCONF
From: Wei Yongjun @ 2011-04-26  0:35 UTC (permalink / raw)
  To: Michio Honda; +Cc: netdev, lksctp-developers
In-Reply-To: <0A966B15-C985-40AC-9402-CAE9BA7F2AC2@sfc.wide.ad.jp>

Hi, Michio

> This patch allows the application to operate Auto-ASCONF on/off behavior via setsockopt() and getsockopt().  

You should update your net-next tree first, and then
create patch base on the latest source code.

> +#define SCTP_AUTO_ASCONF       29

29 has been assigned to other socket option.

>  
>

^ permalink raw reply

* Re: [PATCH] netfilter/IPv6:  initialize TOS field in REJECT target module
From: Pablo Neira Ayuso @ 2011-04-26  1:13 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Fernando Luis Vazquez Cao, netfilter-devel, netdev, yoshfuji,
	jengelh, davem
In-Reply-To: <1303461461.3134.15.camel@edumazet-laptop>

On 22/04/11 10:37, Eric Dumazet wrote:
> Le vendredi 22 avril 2011 à 17:11 +0900, Fernando Luis Vazquez Cao a
> écrit :
> 
>> Thank you!
>>
>> Should we send these two patches to -stable too?
> 
> David takes care of stable submissions for netdev stuff, thanks.

If the patch follows the netfilter path, we'll take care of sending
stable submissions.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH] netfilter/IPv6:  initialize TOS field in REJECT target module
From: Fernando Luis Vazquez Cao @ 2011-04-26  1:26 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: Eric Dumazet, netfilter-devel, netdev, yoshfuji, jengelh, davem
In-Reply-To: <4DB61C2C.7060508@netfilter.org>

On Tue, 2011-04-26 at 03:13 +0200, Pablo Neira Ayuso wrote:
> On 22/04/11 10:37, Eric Dumazet wrote:
> > Le vendredi 22 avril 2011 à 17:11 +0900, Fernando Luis Vazquez Cao a
> > écrit :
> > 
> >> Thank you!
> >>
> >> Should we send these two patches to -stable too?
> > 
> > David takes care of stable submissions for netdev stuff, thanks.
> 
> If the patch follows the netfilter path, we'll take care of sending
> stable submissions.

David, will you take care of these two patches or should they go through
the netfilter tree?

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* how to set vlan filter for intel 82599
From: zhou rui @ 2011-04-26  2:19 UTC (permalink / raw)
  To: netdev

hi
here is the problem troubles me,how to set vlan filter for intel
82599? for example
I want vlan id 0~31 will go to queue 0, vlan id 32-63 will go to queue
1...below is my setting,but doesn't work

don't know the exact meanning of the vlan-mask and vlan,how are they calculated?

./ethtool -K eth5 ntuple on

./ethtool -U eth5 flow-type udp4 src-ip 0x0 src-ip-mask 0x0 dst-ip 0x0
dst-ip-mask 0x0 src-port 0x0 src-port-mask 0x0 dst-port 0x0
dst-port-mask 0x0 vlan 0x0000 vlan-mask 0x00E0 user-def 0x0
user-def-mask 0x0 action 0
./ethtool -U eth5 flow-type udp4 src-ip 0x0 src-ip-mask 0x0 dst-ip 0x0
dst-ip-mask 0x0 src-port 0x0 src-port-mask 0x0 dst-port 0x0
dst-port-mask 0x0 vlan 0x0020 vlan-mask 0x00E0 user-def 0x0
user-def-mask 0x0 action 1
./ethtool -U eth5 flow-type udp4 src-ip 0x0 src-ip-mask 0x0 dst-ip 0x0
dst-ip-mask 0x0 src-port 0x0 src-port-mask 0x0 dst-port 0x0
dst-port-mask 0x0 vlan 0x0040 vlan-mask 0x00E0 user-def 0x0
user-def-mask 0x0 action 2
./ethtool -U eth5 flow-type udp4 src-ip 0x0 src-ip-mask 0x0 dst-ip 0x0
dst-ip-mask 0x0 src-port 0x0 src-port-mask 0x0 dst-port 0x0
dst-port-mask 0x0 vlan 0x0060 vlan-mask 0x00E0 user-def 0x0
user-def-mask 0x0 action 3
./ethtool -U eth5 flow-type udp4 src-ip 0x0 src-ip-mask 0x0 dst-ip 0x0
dst-ip-mask 0x0 src-port 0x0 src-port-mask 0x0 dst-port 0x0
dst-port-mask 0x0 vlan 0x0080 vlan-mask 0x00E0 user-def 0x0
user-def-mask 0x0 action 4
./ethtool -U eth5 flow-type udp4 src-ip 0x0 src-ip-mask 0x0 dst-ip 0x0
dst-ip-mask 0x0 src-port 0x0 src-port-mask 0x0 dst-port 0x0
dst-port-mask 0x0 vlan 0x00A0 vlan-mask 0x00E0 user-def 0x0
user-def-mask 0x0 action 5
./ethtool -U eth5 flow-type udp4 src-ip 0x0 src-ip-mask 0x0 dst-ip 0x0
dst-ip-mask 0x0 src-port 0x0 src-port-mask 0x0 dst-port 0x0
dst-port-mask 0x0 vlan 0x00C0 vlan-mask 0x00E0 user-def 0x0
user-def-mask 0x0 action 6
./ethtool -U eth5 flow-type udp4 src-ip 0x0 src-ip-mask 0x0 dst-ip 0x0
dst-ip-mask 0x0 src-port 0x0 src-port-mask 0x0 dst-port 0x0
dst-port-mask 0x0 vlan 0x00E0 vlan-mask 0x00E0 user-def 0x0
user-def-mask 0x0 action 6

thanks
rui

^ permalink raw reply

* Re: how to set vlan filter for intel 82599
From: Ben Hutchings @ 2011-04-26  2:57 UTC (permalink / raw)
  To: zhou rui; +Cc: netdev
In-Reply-To: <BANLkTikZcdda5-3NBaXyKmij09f62Da3cQ@mail.gmail.com>

On Tue, 2011-04-26 at 10:19 +0800, zhou rui wrote:
> hi
> here is the problem troubles me,how to set vlan filter for intel
> 82599? for example
> I want vlan id 0~31 will go to queue 0, vlan id 32-63 will go to queue
> 1...below is my setting,but doesn't work
> 
> don't know the exact meanning of the vlan-mask and vlan,how are they calculated?
> 
> ./ethtool -K eth5 ntuple on
> 
> ./ethtool -U eth5 flow-type udp4 src-ip 0x0 src-ip-mask 0x0 dst-ip 0x0
> dst-ip-mask 0x0 src-port 0x0 src-port-mask 0x0 dst-port 0x0
> dst-port-mask 0x0 vlan 0x0000 vlan-mask 0x00E0 user-def 0x0
> user-def-mask 0x0 action 0
[...]

This specifies a filter for UDP/IPv4 packets, and the masks are wrong.
If you actually wanted to filter only UDP/IPv4 packets for VID 0-31 then
the correct syntax would be:

    ethtool -U eth5 flow-type udp4 vlan 0 vlan-mask 0xf01f

If you don't care about the layer 3/4 protocols then you would need to
use 'flow-type ether', but no driver implements that yet.  (Well, sfc
implements the *type*, but not filtering by VID only.)

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply

* Re: [PATCH net-next-2.6 v5 5/5] sctp: Add ASCONF operation on the single-homed host
From: Wei Yongjun @ 2011-04-26  3:28 UTC (permalink / raw)
  To: Michio Honda; +Cc: netdev, lksctp-developers
In-Reply-To: <0D29144A-E384-4E7D-AA04-4CC330A2D3AF@sfc.wide.ad.jp>


> SCTP can change the IP address on the single-homed host.  
> In this case, the SCTP association transmits an ASCONF packet including addition of the new IP address and deletion of the old address.  This patch implements this functionality.  
> In this case, the ASCONF chunk is added to the beginning of the queue, because the other chunks cannot be transmitted in this state.  
>
> Signed-off-by: Michio Honda <micchie@sfc.wide.ad.jp>
> ---
> diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
> index c70d8cc..d7a4ee3 100644
> --- a/include/net/sctp/constants.h
> +++ b/include/net/sctp/constants.h
> @@ -441,4 +441,8 @@ enum {
>   */
>  #define SCTP_AUTH_RANDOM_LENGTH 32
>  
> +/* ASCONF PARAMETERS */
> +#define SCTP_ASCONF_V4_PARAM_LEN 16
> +#define SCTP_ASCONF_V6_PARAM_LEN 28

useless defines.

> +
>  #endif /* __sctp_constants_h__ */
> diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
> index cc9185c..db4e9d0 100644
> --- a/include/net/sctp/structs.h
> +++ b/include/net/sctp/structs.h
> @@ -1901,6 +1901,8 @@ struct sctp_association {
>  	 * after reaching 4294967295.
>  	 */
>  	__u32 addip_serial;
> +	union sctp_addr *asconf_addr_del_pending;
> +	int src_out_of_asoc_ok;
>  
>  	/* SCTP AUTH: list of the endpoint shared keys.  These
>  	 * keys are provided out of band by the user applicaton
> diff --git a/net/sctp/associola.c b/net/sctp/associola.c
> index 6b04287..2082d0a 100644
> --- a/net/sctp/associola.c
> +++ b/net/sctp/associola.c
> @@ -279,6 +279,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
>  	asoc->peer.asconf_capable = 0;
>  	if (sctp_addip_noauth)
>  		asoc->peer.asconf_capable = 1;
> +	asoc->asconf_addr_del_pending = NULL;
> +	asoc->src_out_of_asoc_ok = 0;
>  
>  	/* Create an input queue.  */
>  	sctp_inq_init(&asoc->base.inqueue);
> @@ -443,6 +445,10 @@ void sctp_association_free(struct sctp_association *asoc)
>  
>  	asoc->peer.transport_count = 0;
>  
> +	/* Free pending address space being deleted */
> +	if (asoc->asconf_addr_del_pending != NULL)
> +		kfree(asoc->asconf_addr_del_pending);
> +
>  	/* Free any cached ASCONF_ACK chunk. */
>  	sctp_assoc_free_asconf_acks(asoc);
>  
> diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
> index 865ce7b..56c97ce 100644
> --- a/net/sctp/ipv6.c
> +++ b/net/sctp/ipv6.c
> @@ -332,6 +332,13 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk,
>  				matchlen = bmatchlen;
>  			}
>  		}
> +		if (laddr->state == SCTP_ADDR_NEW && asoc->src_out_of_asoc_ok) {
> +			bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
> +			if (!baddr || (matchlen < bmatchlen)) {
> +				baddr = &laddr->a;
> +				matchlen = bmatchlen;
> +			}
> +		}
>  	}
>  
>  	if (baddr) {
> diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
> index 26dc005..28bccde 100644
> --- a/net/sctp/outqueue.c
> +++ b/net/sctp/outqueue.c
> @@ -744,6 +744,16 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
>  	 */
>  
>  	list_for_each_entry_safe(chunk, tmp, &q->control_chunk_list, list) {
> +		/* RFC 5061, 5.3
> +		 * F1) This means that until such time as the ASCONF
> +		 * containing the add is acknowledged, the sender MUST
> +		 * NOT use the new IP address as a source for ANY SCTP
> +		 * packet except on carrying an ASCONF Chunk.
> +		 */
> +		if (asoc->src_out_of_asoc_ok &&
> +		    chunk->chunk_hdr->type != SCTP_CID_ASCONF)
> +			continue;
> +
>  		list_del_init(&chunk->list);
>  
>  		/* Pick the right transport to use. */
> @@ -871,6 +881,9 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
>  		}
>  	}
>  
> +	if (q->asoc->src_out_of_asoc_ok)
> +		goto sctp_flush_out;
> +
>  	/* Is it OK to send data chunks?  */
>  	switch (asoc->state) {
>  	case SCTP_STATE_COOKIE_ECHOED:
> diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
> index 152976e..0733273 100644
> --- a/net/sctp/protocol.c
> +++ b/net/sctp/protocol.c
> @@ -510,7 +510,9 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
>  		sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port));
>  		rcu_read_lock();
>  		list_for_each_entry_rcu(laddr, &bp->address_list, list) {
> -			if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC))
> +			if (!laddr->valid || (laddr->state == SCTP_ADDR_DEL) ||
> +			    (laddr->state != SCTP_ADDR_SRC &&
> +			    !asoc->src_out_of_asoc_ok))
>  				continue;
>  			if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a))
>  				goto out_unlock;
> diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
> index de98665..f341ab2 100644
> --- a/net/sctp/sm_make_chunk.c
> +++ b/net/sctp/sm_make_chunk.c
> @@ -2744,6 +2744,12 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
>  	int			addr_param_len = 0;
>  	int 			totallen = 0;
>  	int 			i;
> +	sctp_addip_param_t del_param; /* 8 Bytes (Type 0xC002, Len and CrrID) */
> +	struct sctp_af *del_af;
> +	int del_addr_param_len = 0;
> +	int del_paramlen = sizeof(sctp_addip_param_t);
> +	union sctp_addr_param del_addr_param; /* (v4) 8 Bytes, (v6) 20 Bytes */
> +	int			del_pickup = 0;
>  
>  	/* Get total length of all the address parameters. */
>  	addr_buf = addrs;
> @@ -2756,6 +2762,17 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
>  		totallen += addr_param_len;
>  
>  		addr_buf += af->sockaddr_len;
> +		if (asoc->asconf_addr_del_pending && !del_pickup) {
> +			if (!sctp_in_scope(asoc->asconf_addr_del_pending,
> +			    sctp_scope(addr)))
> +				continue;
> +			/* reuse the parameter length from the same scope one */
> +			totallen += paramlen;
> +			totallen += addr_param_len;
> +			del_pickup = 1;
> +			asoc->src_out_of_asoc_ok = 1;
> +			SCTP_DEBUG_PRINTK("mkasconf_update_ip: picked same-scope del_pending addr, totallen for all addresses is %d\n", totallen);
> +		}
>  	}
>  
>  	/* Create an asconf chunk with the required length. */
> @@ -2778,6 +2795,19 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
>  
>  		addr_buf += af->sockaddr_len;
>  	}
> +	if (flags == SCTP_PARAM_ADD_IP && del_pickup) {
> +		addr = asoc->asconf_addr_del_pending;
> +		del_af = sctp_get_af_specific(addr->v4.sin_family);
> +		del_addr_param_len = del_af->to_addr_param(addr,
> +		    &del_addr_param);
> +		del_param.param_hdr.type = SCTP_PARAM_DEL_IP;
> +		del_param.param_hdr.length = htons(del_paramlen +
> +		    del_addr_param_len);
> +		del_param.crr_id = i;
> +
> +		sctp_addto_chunk(retval, del_paramlen, &del_param);
> +		sctp_addto_chunk(retval, del_addr_param_len, &del_addr_param);
> +	}
>  	return retval;
>  }
>  
> @@ -3193,7 +3223,8 @@ static void sctp_asconf_param_success(struct sctp_association *asoc,
>  		local_bh_enable();
>  		list_for_each_entry(transport, &asoc->peer.transport_addr_list,
>  				transports) {
> -			if (transport->state == SCTP_ACTIVE)
> +			if (transport->state == SCTP_ACTIVE &&
> +			    !asoc->src_out_of_asoc_ok)
>  				continue;
>  			dst_release(transport->dst);
>  			sctp_transport_route(transport, NULL,
> @@ -3203,6 +3234,11 @@ static void sctp_asconf_param_success(struct sctp_association *asoc,
>  	case SCTP_PARAM_DEL_IP:
>  		local_bh_disable();
>  		sctp_del_bind_addr(bp, &addr);
> +		if (asoc->asconf_addr_del_pending != NULL &&
> +		    sctp_cmp_addr_exact(asoc->asconf_addr_del_pending, &addr)) {
> +			kfree(asoc->asconf_addr_del_pending);
> +			asoc->asconf_addr_del_pending = NULL;
> +		}
>  		local_bh_enable();
>  		list_for_each_entry(transport, &asoc->peer.transport_addr_list,
>  				transports) {
> @@ -3361,6 +3397,9 @@ int sctp_process_asconf_ack(struct sctp_association *asoc,
>  		asconf_len -= length;
>  	}
>  
> +	if (no_err && asoc->src_out_of_asoc_ok)
> +		asoc->src_out_of_asoc_ok = 0;
> +
>  	/* Free the cached last sent asconf chunk. */
>  	list_del_init(&asconf->transmitted_list);
>  	sctp_chunk_free(asconf);
> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
> index 3951a10..481293d 100644
> --- a/net/sctp/socket.c
> +++ b/net/sctp/socket.c
> @@ -583,10 +583,6 @@ static int sctp_send_asconf_add_ip(struct sock		*sk,
>  			goto out;
>  		}
>  
> -		retval = sctp_send_asconf(asoc, chunk);
> -		if (retval)
> -			goto out;
> -
>  		/* Add the new addresses to the bind address list with
>  		 * use_as_src set to 0.
>  		 */
> @@ -599,6 +595,23 @@ static int sctp_send_asconf_add_ip(struct sock		*sk,
>  						    SCTP_ADDR_NEW, GFP_ATOMIC);
>  			addr_buf += af->sockaddr_len;
>  		}
> +		if (asoc->src_out_of_asoc_ok) {
> +			struct sctp_transport *trans;
> +
> +			list_for_each_entry(trans,
> +			    &asoc->peer.transport_addr_list, transports) {
> +				/* Clear the source and route cache */
> +				dst_release(trans->dst);
> +				trans->cwnd = min(4*asoc->pathmtu, max_t(__u32,
> +				    2*asoc->pathmtu, 4380));
> +				trans->ssthresh = asoc->peer.i.a_rwnd;
> +				trans->rto = asoc->rto_initial;
> +				trans->rtt = trans->srtt = trans->rttvar = 0;
> +				sctp_transport_route(trans, NULL,
> +				    sctp_sk(asoc->base.sk));
> +			}
> +		}
> +		retval = sctp_send_asconf(asoc, chunk);
>  	}
>  
>  out:
> @@ -711,7 +724,9 @@ static int sctp_send_asconf_del_ip(struct sock		*sk,
>  	struct sctp_sockaddr_entry *saddr;
>  	int 			i;
>  	int 			retval = 0;
> +	int			stored = 0;
>  
> +	chunk = NULL;
>  	if (!sctp_addip_enable)
>  		return retval;
>  
> @@ -762,8 +777,32 @@ static int sctp_send_asconf_del_ip(struct sock		*sk,
>  		bp = &asoc->base.bind_addr;
>  		laddr = sctp_find_unmatch_addr(bp, (union sctp_addr *)addrs,
>  					       addrcnt, sp);
> -		if (!laddr)
> -			continue;
> +		if ((laddr == NULL) && (addrcnt == 1)) {
> +			if (asoc->asconf_addr_del_pending)
> +				continue;
> +			asoc->asconf_addr_del_pending =
> +			    kzalloc(sizeof(union sctp_addr), GFP_ATOMIC);
> +			asoc->asconf_addr_del_pending->sa.sa_family =
> +				    addrs->sa_family;
> +			asoc->asconf_addr_del_pending->v4.sin_port =
> +				    htons(bp->port);
> +			if (addrs->sa_family == AF_INET) {
> +				struct sockaddr_in *sin;
> +
> +				sin = (struct sockaddr_in *)addrs;
> +				asoc->asconf_addr_del_pending->v4.sin_addr.s_addr = sin->sin_addr.s_addr;
> +			} else if (addrs->sa_family == AF_INET6) {
> +				struct sockaddr_in6 *sin6;
> +
> +				sin6 = (struct sockaddr_in6 *)addrs;
> +				ipv6_addr_copy(&asoc->asconf_addr_del_pending->v6.sin6_addr, &sin6->sin6_addr);
> +			}
> +			SCTP_DEBUG_PRINTK_IPADDR("send_asconf_del_ip: keep the last address asoc: %p ",
> +			    " at %p\n", asoc, asoc->asconf_addr_del_pending,
> +			    asoc->asconf_addr_del_pending);
> +			stored = 1;
> +			goto skip_mkasconf;
> +		}
>  
>  		/* We do not need RCU protection throughout this loop
>  		 * because this is done under a socket lock from the
> @@ -776,6 +815,7 @@ static int sctp_send_asconf_del_ip(struct sock		*sk,
>  			goto out;
>  		}
>  
> +skip_mkasconf:
>  		/* Reset use_as_src flag for the addresses in the bind address
>  		 * list that are to be deleted.
>  		 */
> @@ -801,6 +841,9 @@ static int sctp_send_asconf_del_ip(struct sock		*sk,
>  					     sctp_sk(asoc->base.sk));
>  		}
>  
> +		if (stored)
> +			/* We don't need to transmit ASCONF */
> +			continue;
>  		retval = sctp_send_asconf(asoc, chunk);
>  	}
>  out:
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply

* Re: how to set vlan filter for intel 82599
From: zhou rui @ 2011-04-26  3:39 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: netdev
In-Reply-To: <1303786638.3032.307.camel@localhost>

On Tue, Apr 26, 2011 at 10:57 AM, Ben Hutchings
<bhutchings@solarflare.com> wrote:
> On Tue, 2011-04-26 at 10:19 +0800, zhou rui wrote:
>> hi
>> here is the problem troubles me,how to set vlan filter for intel
>> 82599? for example
>> I want vlan id 0~31 will go to queue 0, vlan id 32-63 will go to queue
>> 1...below is my setting,but doesn't work
>>
>> don't know the exact meanning of the vlan-mask and vlan,how are they calculated?
>>
>> ./ethtool -K eth5 ntuple on
>>
>> ./ethtool -U eth5 flow-type udp4 src-ip 0x0 src-ip-mask 0x0 dst-ip 0x0
>> dst-ip-mask 0x0 src-port 0x0 src-port-mask 0x0 dst-port 0x0
>> dst-port-mask 0x0 vlan 0x0000 vlan-mask 0x00E0 user-def 0x0
>> user-def-mask 0x0 action 0
> [...]
>
> This specifies a filter for UDP/IPv4 packets, and the masks are wrong.
> If you actually wanted to filter only UDP/IPv4 packets for VID 0-31 then
> the correct syntax would be:
>
>    ethtool -U eth5 flow-type udp4 vlan 0 vlan-mask 0xf01f
>
> If you don't care about the layer 3/4 protocols then you would need to
> use 'flow-type ether', but no driver implements that yet.  (Well, sfc
> implements the *type*, but not filtering by VID only.)
>
> Ben.
>
> --
> Ben Hutchings, Senior Software Engineer, Solarflare
> Not speaking for my employer; that's the marketing department's job.
> They asked us to note that Solarflare product names are trademarked.
>
>

hi ben,thanks for your help,would you mind tell me "32~63" VID filter?
still can not understand the vlan-mask

^ permalink raw reply

* [PATCH net-next-2.6 0/7] SCTP updates for net-next-2.6
From: Wei Yongjun @ 2011-04-26  3:44 UTC (permalink / raw)
  To: David Miller; +Cc: netdev@vger.kernel.org, lksctp

Hi David

Here is a set of SCTP patches for net-next-2.6, the last part
from vlad's lksctp-dev tree, update SCTP IPv6 routing and IPSec
issues. Please apply.

Vlad Yasevich (4):
      sctp: cache the ipv6 source after route lookup
      sctp: make sctp over IPv6 work with IPsec
      sctp: remove useless arguments from get_saddr() call
      sctp: clean up route lookup calls

Wei Yongjun (2):
      sctp: clean up IPv6 route and XFRM lookups
      sctp: fix IPv6 source address output routing with IPsec

Weixing Shi (1):
      sctp: fix sctp to work with ipv6 source address routing

 include/net/sctp/structs.h |   17 ++--
 net/sctp/ipv6.c            |  186 ++++++++++++++++++++++++++-----------------
 net/sctp/protocol.c        |   57 ++++++-------
 net/sctp/socket.c          |    2 +-
 net/sctp/transport.c       |   28 ++++---
 5 files changed, 162 insertions(+), 128 deletions(-)



^ permalink raw reply

* [PATCH net-next-2.6 1/7] sctp: fix sctp to work with ipv6 source address routing
From: Wei Yongjun @ 2011-04-26  3:45 UTC (permalink / raw)
  To: David Miller; +Cc: netdev@vger.kernel.org, lksctp
In-Reply-To: <4DB63F85.2090609@cn.fujitsu.com>

From: Weixing Shi <Weixing.Shi@windriver.com>

in the below test case, using the source address routing,
sctp can not work.
Node-A
1)ifconfig eth0 inet6 add 2001:1::1/64
2)ip -6 rule add from 2001:1::1 table 100 pref 100
3)ip -6 route add 2001:2::1 dev eth0 table 100
4)sctp_darn -H 2001:1::1 -P 250 -l &
Node-B
1)ifconfig eth0 inet6 add 2001:2::1/64
2)ip -6 rule add from 2001:2::1 table 100 pref 100
3)ip -6 route add 2001:1::1 dev eth0 table 100
4)sctp_darn -H 2001:2::1 -P 250 -h 2001:1::1 -p 250 -s

root cause:
Node-A and Node-B use the source address routing, and
at begining, source address will be NULL,sctp will
search the  routing table by the destination address,
because using the source address routing table, and
the result dst_entry will be NULL.

solution:
walk through the bind address list to get the source
address and then lookup the routing table again to get
the correct dst_entry.

Signed-off-by: Weixing Shi <Weixing.Shi@windriver.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
---
 net/sctp/ipv6.c |   47 +++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 321f175..5adf585 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -80,6 +80,9 @@
 
 #include <asm/uaccess.h>
 
+static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
+					 union sctp_addr *s2);
+
 /* Event handler for inet6 address addition/deletion events.
  * The sctp_local_addr_list needs to be protocted by a spin lock since
  * multiple notifiers (say IPv4 and IPv6) may be running at the same
@@ -244,8 +247,14 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc,
 					 union sctp_addr *daddr,
 					 union sctp_addr *saddr)
 {
-	struct dst_entry *dst;
+	struct dst_entry *dst = NULL;
 	struct flowi6 fl6;
+	struct sctp_bind_addr *bp;
+	struct sctp_sockaddr_entry *laddr;
+	union sctp_addr *baddr = NULL;
+	__u8 matchlen = 0;
+	__u8 bmatchlen;
+	sctp_scope_t scope;
 
 	memset(&fl6, 0, sizeof(fl6));
 	ipv6_addr_copy(&fl6.daddr, &daddr->v6.sin6_addr);
@@ -261,6 +270,39 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc,
 	}
 
 	dst = ip6_route_output(&init_net, NULL, &fl6);
+	if (!asoc || saddr)
+		goto out;
+
+	if (dst->error) {
+		dst_release(dst);
+		dst = NULL;
+		bp = &asoc->base.bind_addr;
+		scope = sctp_scope(daddr);
+		/* Walk through the bind address list and try to get a dst that
+		 * matches a bind address as the source address.
+		 */
+		rcu_read_lock();
+		list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+			if (!laddr->valid)
+				continue;
+			if ((laddr->state == SCTP_ADDR_SRC) &&
+			    (laddr->a.sa.sa_family == AF_INET6) &&
+			    (scope <= sctp_scope(&laddr->a))) {
+				bmatchlen = sctp_v6_addr_match_len(daddr,
+								   &laddr->a);
+				if (!baddr || (matchlen < bmatchlen)) {
+					baddr = &laddr->a;
+					matchlen = bmatchlen;
+				}
+			}
+		}
+		rcu_read_unlock();
+		if (baddr) {
+			ipv6_addr_copy(&fl6.saddr, &baddr->v6.sin6_addr);
+			dst = ip6_route_output(&init_net, NULL, &fl6);
+		}
+	}
+out:
 	if (!dst->error) {
 		struct rt6_info *rt;
 		rt = (struct rt6_info *)dst;
@@ -269,7 +311,8 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc,
 		return dst;
 	}
 	SCTP_DEBUG_PRINTK("NO ROUTE\n");
-	dst_release(dst);
+	if (dst)
+		dst_release(dst);
 	return NULL;
 }
 
-- 
1.6.5.2



^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox