Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next 1/6] qlge: Fixed packet transmit errors due to potential driver errors.
From: Jitendra Kalsaria @ 2012-06-29 18:24 UTC (permalink / raw)
  To: davem
  Cc: netdev, Ron, "Mercer <ron.mercer",
	Dept_NX_Linux_NIC_Driver, Jitendra Kalsaria
In-Reply-To: <1340994290-28832-1-git-send-email-jitendra.kalsaria@qlogic.com>

From: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>

qlge driver was acting wrongly when considering TX ring full
as a TX error. TX ring full is expected behavior when NIC is
overwhelmed and is expected to happen, as far as packets are
not lost.

Signed-off-by: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>
---
 drivers/net/ethernet/qlogic/qlge/qlge_main.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 09d8d33..cdbc860 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -2562,7 +2562,6 @@ static netdev_tx_t qlge_send(struct sk_buff *skb, struct net_device *ndev)
 			   __func__, tx_ring_idx);
 		netif_stop_subqueue(ndev, tx_ring->wq_id);
 		atomic_inc(&tx_ring->queue_stopped);
-		tx_ring->tx_errors++;
 		return NETDEV_TX_BUSY;
 	}
 	tx_ring_desc = &tx_ring->q[tx_ring->prod_idx];
-- 
1.7.1

^ permalink raw reply related

* [PATCH net-next 0/6] qlge: bug fix
From: Jitendra Kalsaria @ 2012-06-29 18:24 UTC (permalink / raw)
  To: davem
  Cc: netdev, Ron, "Mercer <ron.mercer",
	Dept_NX_Linux_NIC_Driver, Jitendra Kalsaria

From: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>

Please apply it to net-next.

Thanks,
Jitendra

^ permalink raw reply

* [PATCH net-next 5/6] qlge: Categorize receive frame errors from firmware.
From: Jitendra Kalsaria @ 2012-06-29 18:24 UTC (permalink / raw)
  To: davem
  Cc: netdev, Ron, "Mercer <ron.mercer",
	Dept_NX_Linux_NIC_Driver, Jitendra Kalsaria
In-Reply-To: <1340994290-28832-1-git-send-email-jitendra.kalsaria@qlogic.com>

From: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>

Signed-off-by: Sritej Velaga <sritej.velaga@qlogic.com>
Signed-off-by: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>
---
 drivers/net/ethernet/qlogic/qlge/qlge.h         |    8 ++++
 drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c |   14 +++++++
 drivers/net/ethernet/qlogic/qlge/qlge_main.c    |   46 ++++++++++++++++++----
 3 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h
index 5a639df..e81bbb7 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge.h
+++ b/drivers/net/ethernet/qlogic/qlge/qlge.h
@@ -1535,6 +1535,14 @@ struct nic_stats {
 	u64 rx_1024_to_1518_pkts;
 	u64 rx_1519_to_max_pkts;
 	u64 rx_len_err_pkts;
+	/* Receive Mac Err stats */
+	u64 rx_code_err;
+	u64 rx_oversize_err;
+	u64 rx_undersize_err;
+	u64 rx_preamble_err;
+	u64 rx_frame_len_err;
+	u64 rx_crc_err;
+	u64 rx_err_count;
 	/*
 	 * These stats come from offset 500h to 5C8h
 	 * in the XGMAC register.
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c b/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c
index 966bd96..bbc4136 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c
@@ -226,6 +226,13 @@ static char ql_stats_str_arr[][ETH_GSTRING_LEN] = {
 	{"rx_1024_to_1518_pkts"},
 	{"rx_1519_to_max_pkts"},
 	{"rx_len_err_pkts"},
+	{"rx_code_err"},
+	{"rx_oversize_err"},
+	{"rx_undersize_err"},
+	{"rx_preamble_err"},
+	{"rx_frame_len_err"},
+	{"rx_crc_err"},
+	{"rx_err_count"},
 	{"tx_cbfc_pause_frames0"},
 	{"tx_cbfc_pause_frames1"},
 	{"tx_cbfc_pause_frames2"},
@@ -320,6 +327,13 @@ ql_get_ethtool_stats(struct net_device *ndev,
 	*data++ = s->rx_1024_to_1518_pkts;
 	*data++ = s->rx_1519_to_max_pkts;
 	*data++ = s->rx_len_err_pkts;
+	*data++ = s->rx_code_err;
+	*data++ = s->rx_oversize_err;
+	*data++ = s->rx_undersize_err;
+	*data++ = s->rx_preamble_err;
+	*data++ = s->rx_frame_len_err;
+	*data++ = s->rx_crc_err;
+	*data++ = s->rx_err_count;
 	*data++ = s->tx_cbfc_pause_frames0;
 	*data++ = s->tx_cbfc_pause_frames1;
 	*data++ = s->tx_cbfc_pause_frames2;
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index aa514c5..0f56148 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -1433,6 +1433,34 @@ map_error:
 	return NETDEV_TX_BUSY;
 }
 
+/* Categorizing receive firmware frame errors */
+static void ql_categorize_rx_err(struct ql_adapter *qdev, u8 rx_err)
+{
+	qdev->nic_stats.rx_err_count++;
+
+	switch (rx_err & IB_MAC_IOCB_RSP_ERR_MASK) {
+	case IB_MAC_IOCB_RSP_ERR_CODE_ERR:
+		qdev->nic_stats.rx_code_err++;
+		break;
+	case IB_MAC_IOCB_RSP_ERR_OVERSIZE:
+		qdev->nic_stats.rx_oversize_err++;
+		break;
+	case IB_MAC_IOCB_RSP_ERR_UNDERSIZE:
+		qdev->nic_stats.rx_undersize_err++;
+		break;
+	case IB_MAC_IOCB_RSP_ERR_PREAMBLE:
+		qdev->nic_stats.rx_preamble_err++;
+		break;
+	case IB_MAC_IOCB_RSP_ERR_FRAME_LEN:
+		qdev->nic_stats.rx_frame_len_err++;
+		break;
+	case IB_MAC_IOCB_RSP_ERR_CRC:
+		qdev->nic_stats.rx_crc_err++;
+	default:
+		break;
+	}
+}
+
 /* Process an inbound completion from an rx ring. */
 static void ql_process_mac_rx_gro_page(struct ql_adapter *qdev,
 					struct rx_ring *rx_ring,
@@ -1446,6 +1474,12 @@ static void ql_process_mac_rx_gro_page(struct ql_adapter *qdev,
 
 	napi->dev = qdev->ndev;
 
+	if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) {
+		ql_categorize_rx_err(qdev, ib_mac_rsp->flags2);
+		put_page(lbq_desc->p.pg_chunk.page);
+		return;
+	}
+
 	skb = napi_get_frags(napi);
 	if (!skb) {
 		netif_err(qdev, drv, qdev->ndev,
@@ -1502,9 +1536,7 @@ static void ql_process_mac_rx_page(struct ql_adapter *qdev,
 
 	/* Frame error, so drop the packet. */
 	if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) {
-		netif_info(qdev, drv, qdev->ndev,
-			  "Receive error, flags2 = 0x%x\n", ib_mac_rsp->flags2);
-		rx_ring->rx_errors++;
+		ql_categorize_rx_err(qdev, ib_mac_rsp->flags2);
 		goto err_out;
 	}
 
@@ -1595,10 +1627,8 @@ static void ql_process_mac_rx_skb(struct ql_adapter *qdev,
 
 	/* Frame error, so drop the packet. */
 	if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) {
-		netif_info(qdev, drv, qdev->ndev,
-			  "Receive error, flags2 = 0x%x\n", ib_mac_rsp->flags2);
+		ql_categorize_rx_err(qdev, ib_mac_rsp->flags2);
 		dev_kfree_skb_any(skb);
-		rx_ring->rx_errors++;
 		return;
 	}
 
@@ -1910,10 +1940,8 @@ static void ql_process_mac_split_rx_intr(struct ql_adapter *qdev,
 
 	/* Frame error, so drop the packet. */
 	if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) {
-		netif_info(qdev, drv, qdev->ndev,
-			  "Receive error, flags2 = 0x%x\n", ib_mac_rsp->flags2);
+		ql_categorize_rx_err(qdev, ib_mac_rsp->flags2);
 		dev_kfree_skb_any(skb);
-		rx_ring->rx_errors++;
 		return;
 	}
 
-- 
1.7.1

^ permalink raw reply related

* [PATCH net-next 6/6] qlge: Bumped driver version to 1.00.00.31
From: Jitendra Kalsaria @ 2012-06-29 18:24 UTC (permalink / raw)
  To: davem
  Cc: netdev, Ron, "Mercer <ron.mercer",
	Dept_NX_Linux_NIC_Driver, Jitendra Kalsaria
In-Reply-To: <1340994290-28832-1-git-send-email-jitendra.kalsaria@qlogic.com>

From: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>

Signed-off-by: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>
---
 drivers/net/ethernet/qlogic/qlge/qlge.h |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h
index e81bbb7..5a8c00c 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge.h
+++ b/drivers/net/ethernet/qlogic/qlge/qlge.h
@@ -18,7 +18,7 @@
  */
 #define DRV_NAME  	"qlge"
 #define DRV_STRING 	"QLogic 10 Gigabit PCI-E Ethernet Driver "
-#define DRV_VERSION	"v1.00.00.30.00.00-01"
+#define DRV_VERSION	"v1.00.00.31"
 
 #define WQ_ADDR_ALIGN	0x3	/* 4 byte alignment */
 
-- 
1.7.1

^ permalink raw reply related

* [PATCH net-next 4/6] qlge: Fixed double pci free upon tx_ring->q allocation failure.
From: Jitendra Kalsaria @ 2012-06-29 18:24 UTC (permalink / raw)
  To: davem
  Cc: netdev, Ron, "Mercer <ron.mercer",
	Dept_NX_Linux_NIC_Driver, Jitendra Kalsaria
In-Reply-To: <1340994290-28832-1-git-send-email-jitendra.kalsaria@qlogic.com>

From: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>

Signed-off-by: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>
---
 drivers/net/ethernet/qlogic/qlge/qlge_main.c |   14 ++++++++------
 1 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index cdbc860..aa514c5 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -2701,11 +2701,9 @@ static int ql_alloc_tx_resources(struct ql_adapter *qdev,
 	    pci_alloc_consistent(qdev->pdev, tx_ring->wq_size,
 				 &tx_ring->wq_base_dma);
 
-	if ((tx_ring->wq_base == NULL) ||
-	    tx_ring->wq_base_dma & WQ_ADDR_ALIGN) {
-		netif_err(qdev, ifup, qdev->ndev, "tx_ring alloc failed.\n");
-		return -ENOMEM;
-	}
+	if ((tx_ring->wq_base == NULL) || tx_ring->wq_base_dma & WQ_ADDR_ALIGN)
+		goto err;
+
 	tx_ring->q =
 	    kmalloc(tx_ring->wq_len * sizeof(struct tx_ring_desc), GFP_KERNEL);
 	if (tx_ring->q == NULL)
@@ -2713,8 +2711,12 @@ static int ql_alloc_tx_resources(struct ql_adapter *qdev,
 
 	return 0;
 err:
-	pci_free_consistent(qdev->pdev, tx_ring->wq_size,
+	if (tx_ring->wq_base) {
+		pci_free_consistent(qdev->pdev, tx_ring->wq_size,
 			    tx_ring->wq_base, tx_ring->wq_base_dma);
+		tx_ring->wq_base = NULL;
+	}
+	netif_err(qdev, ifup, qdev->ndev, "tx_ring alloc failed.\n");
 	return -ENOMEM;
 }
 
-- 
1.7.1

^ permalink raw reply related

* [PATCH net-next 3/6] qlge: Garbage values shown in extra info during selftest.
From: Jitendra Kalsaria @ 2012-06-29 18:24 UTC (permalink / raw)
  To: davem
  Cc: netdev, Ron, "Mercer <ron.mercer",
	Dept_NX_Linux_NIC_Driver, Jitendra Kalsaria
In-Reply-To: <1340994290-28832-1-git-send-email-jitendra.kalsaria@qlogic.com>

From: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>

while running selftest 'ethtool -t' multiple times will get
different values in the 'extra info' section, which was garbage.

Signed-off-by: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>
---
 drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c |    5 +++++
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c b/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c
index 81672f5..966bd96 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c
@@ -248,6 +248,9 @@ static char ql_stats_str_arr[][ETH_GSTRING_LEN] = {
 static void ql_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 {
 	switch (stringset) {
+	case ETH_SS_TEST:
+		memcpy(buf, *ql_gstrings_test, QLGE_TEST_LEN * ETH_GSTRING_LEN);
+		break;
 	case ETH_SS_STATS:
 		memcpy(buf, ql_stats_str_arr, sizeof(ql_stats_str_arr));
 		break;
@@ -539,6 +542,8 @@ static void ql_self_test(struct net_device *ndev,
 {
 	struct ql_adapter *qdev = netdev_priv(ndev);
 
+	memset(data, 0, sizeof(u64) * QLGE_TEST_LEN);
+
 	if (netif_running(ndev)) {
 		set_bit(QL_SELFTEST, &qdev->flags);
 		if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
-- 
1.7.1

^ permalink raw reply related

* [PATCH net-next 2/6] qlge: Stand-up card should not report supporting wol.
From: Jitendra Kalsaria @ 2012-06-29 18:24 UTC (permalink / raw)
  To: davem
  Cc: netdev, Ron, "Mercer <ron.mercer",
	Dept_NX_Linux_NIC_Driver, Jitendra Kalsaria
In-Reply-To: <1340994290-28832-1-git-send-email-jitendra.kalsaria@qlogic.com>

From: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>

Signed-off-by: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>
---
 drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c |   43 ++++++++++++++--------
 1 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c b/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c
index 8e2c2a7..81672f5 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c
@@ -388,10 +388,14 @@ static void ql_get_drvinfo(struct net_device *ndev,
 static void ql_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
 {
 	struct ql_adapter *qdev = netdev_priv(ndev);
-	/* What we support. */
-	wol->supported = WAKE_MAGIC;
-	/* What we've currently got set. */
-	wol->wolopts = qdev->wol;
+
+	if (qdev->pdev->subsystem_device == 0x0068 ||
+			qdev->pdev->subsystem_device == 0x0180) {
+		/* What we support. */
+		wol->supported = WAKE_MAGIC;
+		/* What we've currently got set. */
+		wol->wolopts = qdev->wol;
+	}
 }
 
 static int ql_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
@@ -399,19 +403,26 @@ static int ql_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
 	struct ql_adapter *qdev = netdev_priv(ndev);
 	int status;
 
-	if (wol->wolopts & ~WAKE_MAGIC)
-		return -EINVAL;
-	qdev->wol = wol->wolopts;
-
-	netif_info(qdev, drv, qdev->ndev, "Set wol option 0x%x\n", qdev->wol);
-	if (!qdev->wol) {
-		u32 wol = 0;
-		status = ql_mb_wol_mode(qdev, wol);
-		netif_err(qdev, drv, qdev->ndev, "WOL %s (wol code 0x%x)\n",
-			  status == 0 ? "cleared successfully" : "clear failed",
-			  wol);
+	if (qdev->pdev->subsystem_device == 0x0068 ||
+			qdev->pdev->subsystem_device == 0x0180) {
+		if (wol->wolopts & ~WAKE_MAGIC)
+			return -EINVAL;
+		qdev->wol = wol->wolopts;
+
+		netif_info(qdev, drv, qdev->ndev,
+				"Set wol option 0x%x\n", qdev->wol);
+		if (!qdev->wol) {
+			u32 wol = 0;
+			status = ql_mb_wol_mode(qdev, wol);
+			netif_err(qdev, drv, qdev->ndev,
+			"WOL %s (wol code 0x%x)\n",
+			status == 0 ? "cleared successfully" : "clear failed",
+			wol);
+		}
+	} else {
+		netif_info(qdev, drv, qdev->ndev,
+				"WOL is not supported on stand-up card\n");
 	}
-
 	return 0;
 }
 
-- 
1.7.1

^ permalink raw reply related

* Re: [PATCH v3] sctp: be more restrictive in transport selection on bundled sacks
From: Vlad Yasevich @ 2012-06-29 18:29 UTC (permalink / raw)
  To: Neil Horman; +Cc: netdev, David S. Miller
In-Reply-To: <1340987696-19205-1-git-send-email-nhorman@tuxdriver.com>

On 06/29/2012 12:34 PM, Neil Horman wrote:
> It was noticed recently that when we send data on a transport, its possible that
> we might bundle a sack that arrived on a different transport.  While this isn't
> a major problem, it does go against the SHOULD requirement in section 6.4 of RFC
> 2960:
>
>   An endpoint SHOULD transmit reply chunks (e.g., SACK, HEARTBEAT ACK,
>     etc.) to the same destination transport address from which it
>     received the DATA or control chunk to which it is replying.  This
>     rule should also be followed if the endpoint is bundling DATA chunks
>     together with the reply chunk.
>
> This patch seeks to correct that.  It restricts the bundling of sack operations
> to only those transports which have moved the ctsn of the association forward
> since the last sack.  By doing this we guarantee that we only bundle outbound
> saks on a transport that has received a chunk since the last sack.  This brings
> us into stricter compliance with the RFC.
>
> Vlad had initially suggested that we strictly allow only sack bundling on the
> transport that last moved the ctsn forward.  While this makes sense, I was
> concerned that doing so prevented us from bundling in the case where we had
> received chunks that moved the ctsn on multiple transports.  In those cases, the
> RFC allows us to select any of the transports having received chunks to bundle
> the sack on.  so I've modified the approach to allow for that, by adding a state
> variable to each transport that tracks weather it has moved the ctsn since the
> last sack.  This I think keeps our behavior (and performance), close enough to
> our current profile that I think we can do this without a sysctl knob to
> enable/disable it.
>
> Signed-off-by: Neil Horman<nhorman@tuxdriver.com>
> CC: Vlad Yaseivch<vyasevich@gmail.com>
> CC: David S. Miller<davem@davemloft.net>
> Reported-by: Michele Baldessari<michele@redhat.com>
> Reported-by: sorin serban<sserban@redhat.com>
>
> ---
> Change Notes:
> V2)
> 	* Removed unused variable as per Dave M. Request
> 	* Delayed rwnd adjustment until we are sure we will sack (Vlad Y.)
> V3)
> 	* Switched test to use pkt->transport rather than chunk->transport
> 	* Modified detection of sacka-able transport.  Instead of just setting
> 	  and clearning a flag, we now mark each transport and association with
> 	  a sack generation tag.  We increment the associations generation on
> 	  every sack, and assign that generation tag to every transport that
> 	  updates the ctsn.  This prevents us from having to iterate over a for
> 	  loop on every sack, which is much more scalable.
> ---
>   include/net/sctp/structs.h |    4 ++++
>   include/net/sctp/tsnmap.h  |    3 ++-
>   net/sctp/associola.c       |    1 +
>   net/sctp/output.c          |    9 +++++++--
>   net/sctp/sm_make_chunk.c   |   10 ++++++++++
>   net/sctp/sm_sideeffect.c   |    2 +-
>   net/sctp/transport.c       |    2 ++
>   net/sctp/tsnmap.c          |    6 +++++-
>   net/sctp/ulpevent.c        |    3 ++-
>   net/sctp/ulpqueue.c        |    2 +-
>   10 files changed, 35 insertions(+), 7 deletions(-)
>
> diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
> index e4652fe..fecdf31 100644
> --- a/include/net/sctp/structs.h
> +++ b/include/net/sctp/structs.h
> @@ -912,6 +912,9 @@ struct sctp_transport {
>   		/* Is this structure kfree()able? */
>   		malloced:1;
>
> +	/* Has this transport moved the ctsn since we last sacked */
> +	__u32 sack_generation;
> +
>   	struct flowi fl;
>
>   	/* This is the peer's IP address and port. */
> @@ -1584,6 +1587,7 @@ struct sctp_association {
>   		 */
>   		__u8    sack_needed;     /* Do we need to sack the peer? */
>   		__u32	sack_cnt;
> +		__u32	sack_generation;
>
>   		/* These are capabilities which our peer advertised.  */
>   		__u8	ecn_capable:1,	    /* Can peer do ECN? */
> diff --git a/include/net/sctp/tsnmap.h b/include/net/sctp/tsnmap.h
> index e7728bc..2c5d2b4 100644
> --- a/include/net/sctp/tsnmap.h
> +++ b/include/net/sctp/tsnmap.h
> @@ -117,7 +117,8 @@ void sctp_tsnmap_free(struct sctp_tsnmap *map);
>   int sctp_tsnmap_check(const struct sctp_tsnmap *, __u32 tsn);
>
>   /* Mark this TSN as seen.  */
> -int sctp_tsnmap_mark(struct sctp_tsnmap *, __u32 tsn);
> +int sctp_tsnmap_mark(struct sctp_tsnmap *, __u32 tsn,
> +		     struct sctp_transport *trans);
>
>   /* Mark this TSN and all lower as seen. */
>   void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn);
> diff --git a/net/sctp/associola.c b/net/sctp/associola.c
> index 5bc9ab1..6c66adb 100644
> --- a/net/sctp/associola.c
> +++ b/net/sctp/associola.c
> @@ -271,6 +271,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
>   	 */
>   	asoc->peer.sack_needed = 1;
>   	asoc->peer.sack_cnt = 0;
> +	asoc->peer.sack_generation=0;
>
>   	/* Assume that the peer will tell us if he recognizes ASCONF
>   	 * as part of INIT exchange.
> diff --git a/net/sctp/output.c b/net/sctp/output.c
> index f1b7d4b..0de6cd5 100644
> --- a/net/sctp/output.c
> +++ b/net/sctp/output.c
> @@ -240,14 +240,19 @@ static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt,
>   	 */
>   	if (sctp_chunk_is_data(chunk)&&  !pkt->has_sack&&
>   	!pkt->has_cookie_echo) {
> -		struct sctp_association *asoc;
>   		struct timer_list *timer;
> -		asoc = pkt->transport->asoc;
> +		struct sctp_association *asoc = pkt->transport->asoc;
> +
>   		timer =&asoc->timers[SCTP_EVENT_TIMEOUT_SACK];
>
>   		/* If the SACK timer is running, we have a pending SACK */
>   		if (timer_pending(timer)) {
>   			struct sctp_chunk *sack;
> +
> +			if (pkt->transport->sack_generation !=
> +			    pkt->transport->asoc->peer.sack_generation)
> +				return retval;
> +
>   			asoc->a_rwnd = asoc->rwnd;
>   			sack = sctp_make_sack(asoc);
>   			if (sack) {
> diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
> index a85eeeb..ffa2a8e 100644
> --- a/net/sctp/sm_make_chunk.c
> +++ b/net/sctp/sm_make_chunk.c
> @@ -736,6 +736,7 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
>   	__u16 num_gabs, num_dup_tsns;
>   	struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
>   	struct sctp_gap_ack_block gabs[SCTP_MAX_GABS];
> +	struct sctp_transport *trans;
>
>   	memset(gabs, 0, sizeof(gabs));
>   	ctsn = sctp_tsnmap_get_ctsn(map);
> @@ -805,6 +806,15 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
>   		sctp_addto_chunk(retval, sizeof(__u32) * num_dup_tsns,
>   				 sctp_tsnmap_get_dups(map));
>
> +	/*
> +	 * Once we have a sack generated, clear the moved_tsn information
> +	 * from all the transports
> +	 */
> +	if (!asoc->peer.sack_generation)
> +		list_for_each_entry(trans,&asoc->peer.transport_addr_list,
> +				    transports)
> +			trans->sack_generation = UINT_MAX;
> +	((struct sctp_association *)asoc)->peer.sack_generation++;

Two points here:
1) The commend no longer matches the code
2) Why special case the peer.sack_generations == 0 and set the transport 
to UNIT_MAX?

-vlad

>   nodata:
>   	return retval;
>   }
> diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
> index c96d1a8..8716da1 100644
> --- a/net/sctp/sm_sideeffect.c
> +++ b/net/sctp/sm_sideeffect.c
> @@ -1268,7 +1268,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
>   		case SCTP_CMD_REPORT_TSN:
>   			/* Record the arrival of a TSN.  */
>   			error = sctp_tsnmap_mark(&asoc->peer.tsn_map,
> -						 cmd->obj.u32);
> +						 cmd->obj.u32, NULL);
>   			break;
>
>   		case SCTP_CMD_REPORT_FWDTSN:
> diff --git a/net/sctp/transport.c b/net/sctp/transport.c
> index b026ba0..1dcceb6 100644
> --- a/net/sctp/transport.c
> +++ b/net/sctp/transport.c
> @@ -68,6 +68,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
>   	peer->af_specific = sctp_get_af_specific(addr->sa.sa_family);
>   	memset(&peer->saddr, 0, sizeof(union sctp_addr));
>
> +	peer->sack_generation = 0;
> +
>   	/* From 6.3.1 RTO Calculation:
>   	 *
>   	 * C1) Until an RTT measurement has been made for a packet sent to the
> diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c
> index f1e40ceb..b5fb7c4 100644
> --- a/net/sctp/tsnmap.c
> +++ b/net/sctp/tsnmap.c
> @@ -114,7 +114,8 @@ int sctp_tsnmap_check(const struct sctp_tsnmap *map, __u32 tsn)
>
>
>   /* Mark this TSN as seen.  */
> -int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn)
> +int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn,
> +		     struct sctp_transport *trans)
>   {
>   	u16 gap;
>
> @@ -133,6 +134,9 @@ int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn)
>   		 */
>   		map->max_tsn_seen++;
>   		map->cumulative_tsn_ack_point++;
> +		if (trans)
> +			trans->sack_generation =
> +				trans->asoc->peer.sack_generation;
>   		map->base_tsn++;
>   	} else {
>   		/* Either we already have a gap, or about to record a gap, so
> diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
> index 8a84017..33d8947 100644
> --- a/net/sctp/ulpevent.c
> +++ b/net/sctp/ulpevent.c
> @@ -715,7 +715,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
>   	 * can mark it as received so the tsn_map is updated correctly.
>   	 */
>   	if (sctp_tsnmap_mark(&asoc->peer.tsn_map,
> -			     ntohl(chunk->subh.data_hdr->tsn)))
> +			     ntohl(chunk->subh.data_hdr->tsn),
> +			     chunk->transport))
>   		goto fail_mark;
>
>   	/* First calculate the padding, so we don't inadvertently
> diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
> index f2d1de7..f5a6a4f 100644
> --- a/net/sctp/ulpqueue.c
> +++ b/net/sctp/ulpqueue.c
> @@ -1051,7 +1051,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
>   	if (chunk&&  (freed>= needed)) {
>   		__u32 tsn;
>   		tsn = ntohl(chunk->subh.data_hdr->tsn);
> -		sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn);
> +		sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn, chunk->transport);
>   		sctp_ulpq_tail_data(ulpq, chunk, gfp);
>
>   		sctp_ulpq_partial_delivery(ulpq, chunk, gfp);

^ permalink raw reply

* Re: [PATCH 1/5] netfilter: ipset: fix interface comparision in hash-netiface sets
From: Florian Westphal @ 2012-06-29 18:24 UTC (permalink / raw)
  To: David Laight; +Cc: pablo, netfilter-devel, davem, netdev
In-Reply-To: <AE90C24D6B3A694183C094C60CF0A2F6026B6F6E@saturn3.aculab.com>

David Laight <David.Laight@ACULAB.COM> wrote:
> > From: Florian Westphal <fw@strlen.de>
> > 
> > ifname_compare() assumes that skb->dev is zero-padded,
> > e.g 'eth1\0\0\0\0\0...'. This isn't always the case. e1000 driver does
> > 
> > strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
> > 
> > in e1000_probe(), so once device is registered dev->name memory
> contains
> > 'eth1\0:0:3\0\0\0' (or something like that), which makes eth1 compare
> fail.
> 
> strncpy() would normally zero-fill the destination buffer
> (at least the libc version does).
> 
> So something else must be wrong.

No. driver .probe() runs before the device name is filled in, and no
explict zeroing happens there.

^ permalink raw reply

* Re[2]:  BUG: NULL pointer in ctnetlink_conntrack_event
From: Hans Schillstrom @ 2012-06-29 18:22 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netdev, netfilter-devel

>On Fri, Jun 29, 2012 at 02:29:37PM +0200, Hans Schillstrom wrote:
>> Hello,
>> 
>> There is a "hard to find" problem in ctnetlink_conntrack_event() when calling
>> netlink_has_listeners() net->nfnl is NULL.
>> 
>> The rcu stuff seems to be right at a first look but who knows...
>> 
>> The line below fix the problem, but that is not the root cause.
>> 
>>  int nfnetlink_has_listeners(struct net *net, unsigned int group)
>>  {
>> -       return netlink_has_listeners(net->nfnl, group);
>> +       return net->nfnl ? netlink_has_listeners(net->nfnl, group) : 0 ;
>>  }
>> 
>> Yes it is a 3.0.26 kernel but this patch is applied
>> netfilter: nf_conntrack: make event callback registration per-netns
>
>I think this patch above is missing some rcu_access_pointer usage.
>
>Please, see patch attached.

Thanks it looks like it's the missing patch.

/Hans

^ permalink raw reply

* Re: AF_BUS socket address family
From: Chris Friesen @ 2012-06-29 18:16 UTC (permalink / raw)
  To: Vincent Sanders; +Cc: netdev, linux-kernel, David S. Miller
In-Reply-To: <1340988354-26981-1-git-send-email-vincent.sanders@collabora.co.uk>

On 06/29/2012 10:45 AM, Vincent Sanders wrote:
> This series adds the bus address family (AF_BUS) it is against
> net-next as of yesterday.
>
> AF_BUS is a message oriented inter process communication system.
>
> The principle features are:
>
>   - Reliable datagram based communication (all sockets are of type
>     SOCK_SEQPACKET)
>
>   - Multicast message delivery (one to many, unicast as a subset)
>
>   - Strict ordering (messages are delivered to every client in the same order)
>
>   - Ability to pass file descriptors
>
>   - Ability to pass credentials
>

I haven't had time to look at the code yet, but if you haven't already 
I'd like to propose adding the ability for someone with suitable 
privileges to eavesdrop on all communications.  We've been using 
something similar to this (essentially a simplified multicast unix 
datagram protocol) for many years now and having a tcpdump-like ability 
is very useful for debugging.

Chris

^ permalink raw reply

* Reporting a Kernel Bug
From: Lucas Willian Bocchi @ 2012-06-29 17:49 UTC (permalink / raw)
  To: netdev

Dear Sir

Proceeding as oriented, I'm sending the link to these probable bug.


https://bugzilla.kernel.org/show_bug.cgi?id=43901

Thanks for advance.

^ permalink raw reply

* tcp mtu probing oddity - small packets
From: George B. @ 2012-06-29 17:43 UTC (permalink / raw)
  To: netdev

This is mainly meant to put this in the archive in case anyone else
has this problem.  It was a hair-puller.

Setup is a load balancer doing source NAT to a linux server.  Server
had /proc/sys/net/ipv4/tcp_mtu_probing=2

Apparently the PMTU probing somehow gets confused and the flow gets
clamped to the tcp_base_mss value (512 byte packets).  But this PMTU
value is apparently cached for all traffic to the IP address and since
that is a source NAT, ALL flows from that IP address get clamped to
512 byte packets.  So if something goes wonky with one flow, it
impacts all flows because they all appear to come from the same IP
address.

Setting tcp_mtu_probing to 1 has cleared the issue for now but it may
come back from time to time if a flow gets into trouble and whatever
Linux is doing in the PMTU probing causes the flow to be clamped to
tcp_base_mss but assuming that PMTU will age out at some point after
the problem client goes away, things should return to normal behavior.

There's something about how the PMTU probes work  that apparently
cause them to fail when activated in this particular configuration and
clamp the packet size to tcp_base_mss for all flows to that IP
address. If you are behind a source NAT, all flows share the same IP
address and so all flows get clamped.

Linux 2.6.38-11

^ permalink raw reply

* Re: [net-next.git 4/4 (v9)] phy: add the EEE support and the way to access to the MMD registers.
From: Ben Hutchings @ 2012-06-29 17:36 UTC (permalink / raw)
  To: Giuseppe CAVALLARO; +Cc: netdev, eric.dumazet, rayagond, davem, yuvalmin
In-Reply-To: <1340867678-18375-5-git-send-email-peppe.cavallaro@st.com>

On Thu, 2012-06-28 at 09:14 +0200, Giuseppe CAVALLARO wrote:
> This patch adds the support for the Energy-Efficient Ethernet (EEE)
> to the Physical Abstraction Layer.
> To support the EEE we have to access to the MMD registers 3.20 and
> 7.60/61. So two new functions have been added to read/write the MMD
> registers (clause 45).
> 
> An Ethernet driver (I tested the stmmac) can invoke the phy_init_eee to properly
> check if the EEE is supported by the PHYs and it can also set the clock
> stop enable bit in the 3.0 register.
> The phy_get_eee_err can be used for reporting the number of time where
> the PHY failed to complete its normal wake sequence.
> 
> In the end, this patch also adds the EEE ethtool support implementing:
>  o phy_ethtool_set_eee
>  o phy_ethtool_get_eee
> 
> v1: initial patch
> v2: fixed some errors especially on naming convention
> v3: renamed again the mmd read/write functions thank to Ben's feedback
> v4: moved file to phy.c and added the ethtool support.
> v5: fixed phy_adv_to_eee, phy_eee_to_supported, phy_eee_to_adv return
>     values according to ethtool API (thanks to Ben's feedback).
>     Renamed some macros to avoid too long names.
> v6: fixed kernel-doc comments to be properly parsed.
>     Fixed the phy_init_eee function: we need to check which link mode
>     was autonegotiated and then the corresponding bits in 7.60 and 7.61
>     registers.
> v7: reviewed the way to get the negotiated settings.
> v8: fixed a problem in the phy_init_eee return value erroneously added
>     when included the phy_read_status call.
> v9: do not remove the MDIO_AN_EEE_ADV_100TX and MDIO_AN_EEE_ADV_1000T
>     and fixed the eee_{cap,lp,adv} declaration as "int" instead of u16.
> 
> Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
[...]
Reviewed-by: Ben Hutchings <bhutchings@solarflare.com>

(but not tested in any way)

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply

* Re: [RFC] [TCP 1/3] tcp: Add MSG_NEW_PACKET flag to indicate preferable packet boundaries
From: Eric Dumazet @ 2012-06-29 17:15 UTC (permalink / raw)
  To: Andreas Gruenbacher; +Cc: netdev, linux-kernel, Herbert Xu, David S. Miller
In-Reply-To: <1340984335.25450.24.camel@gurkel.linbit>

On Fri, 2012-06-29 at 17:38 +0200, Andreas Gruenbacher wrote:

> The primary use case is fast Gigabit (10 or more) Ethernet connections
> with jumbo frames and switches that support them.  There, frames will go
> through unchanged and you can zero-copy receive all the time.
> 
> Not sure how well the approach scales to other kinds of connections; it
> may work often enough to be worth it.  When things get distorted between
> the sender and the receiver and tcp_recvbio() fails, the data can still
> be copied out of the socket as before.

If you have a packet loss, receiver can and will coalesce frames.

^ permalink raw reply

* Re: [PATCH net-next 13/15] netfilter: nfdbus: Add D-bus message parsing
From: Pablo Neira Ayuso @ 2012-06-29 17:11 UTC (permalink / raw)
  To: Vincent Sanders
  Cc: netdev, linux-kernel, David S. Miller, Javier Martinez Canillas,
	Alban Crequy
In-Reply-To: <1340988354-26981-14-git-send-email-vincent.sanders@collabora.co.uk>

On Fri, Jun 29, 2012 at 05:45:52PM +0100, Vincent Sanders wrote:
> From: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
> 
> The netfilter D-Bus module needs to parse D-bus messages sent by
> applications to decide whether a peer can receive or not a D-Bus
> message. Add D-bus message parsing logic to be able to analyze.

Not talking about the entire patchset, only about the part I'm
responsible for.

I don't see why you think this belong to netfilter at all.

This doesn't integrate into the existing filtering infrastructure,
neither it extends it in any way.

> Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
> Signed-off-by: Alban Crequy <alban.crequy@collabora.co.uk>
> ---
>  net/netfilter/nfdbus/message.c |  194 ++++++++++++++++++++++++++++++++++++++++
>  net/netfilter/nfdbus/message.h |   71 +++++++++++++++
>  2 files changed, 265 insertions(+)
>  create mode 100644 net/netfilter/nfdbus/message.c
>  create mode 100644 net/netfilter/nfdbus/message.h
> 
> diff --git a/net/netfilter/nfdbus/message.c b/net/netfilter/nfdbus/message.c
> new file mode 100644
> index 0000000..93c409c
> --- /dev/null
> +++ b/net/netfilter/nfdbus/message.c
> @@ -0,0 +1,194 @@
> +/*
> + * message.c  Basic D-Bus message parsing
> + *
> + * Copyright (C) 2010-2012  Collabora Ltd
> + * Authors:	Alban Crequy <alban.crequy@collabora.co.uk>
> + * Copyright (C) 2002, 2003, 2004, 2005  Red Hat Inc.
> + * Copyright (C) 2002, 2003  CodeFactory AB
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + *
> + */
> +
> +#include <linux/slab.h>
> +
> +#include "message.h"
> +
> +int dbus_message_type_from_string(const char *type_str)
> +{
> +	if (strcmp(type_str, "method_call") == 0)
> +		return DBUS_MESSAGE_TYPE_METHOD_CALL;
> +	if (strcmp(type_str, "method_return") == 0)
> +		return DBUS_MESSAGE_TYPE_METHOD_RETURN;
> +	else if (strcmp(type_str, "signal") == 0)
> +		return DBUS_MESSAGE_TYPE_SIGNAL;
> +	else if (strcmp(type_str, "error") == 0)
> +		return DBUS_MESSAGE_TYPE_ERROR;
> +	else
> +		return DBUS_MESSAGE_TYPE_INVALID;
> +}
> +
> +int dbus_message_parse(unsigned char *message, size_t len,
> +		       struct dbus_message *dbus_message)
> +{
> +	unsigned char *cur;
> +	int array_header_len;
> +
> +	dbus_message->message = message;
> +
> +	if (len < 4 + 4 + 4 + 4 || message[1] == 0 || message[1] > 4)
> +		return -EINVAL;
> +
> +	dbus_message->type = message[1];
> +	dbus_message->body_length = *((u32 *)(message + 4));
> +	cur = message + 12;
> +	array_header_len = *(u32 *)cur;
> +	dbus_message->len_offset = 12;
> +	cur += 4;
> +	while (cur < message + len
> +	       && cur < message + 12 + 4 + array_header_len) {
> +		int header_code;
> +		int signature_len;
> +		unsigned char *signature;
> +		int str_len;
> +		unsigned char *str;
> +
> +		/* D-Bus alignment craziness */
> +		if ((cur - message) % 8 != 0)
> +			cur += 8 - (cur - message) % 8;
> +
> +		header_code = *(char *)cur;
> +		cur++;
> +		signature_len = *(char *)cur;
> +		/* All header fields of the current D-Bus spec have a simple
> +		 * type, either o, s, g, or u */
> +		if (signature_len != 1)
> +			return -EINVAL;
> +		cur++;
> +		signature = cur;
> +		cur += signature_len + 1;
> +		if (signature[0] != 'o' &&
> +		    signature[0] != 's' &&
> +		    signature[0] != 'g' &&
> +		    signature[0] != 'u')
> +			return -EINVAL;
> +
> +		if (signature[0] == 'u') {
> +			cur += 4;
> +			continue;
> +		}
> +
> +		if (signature[0] != 'g') {
> +			str_len = *(u32 *)cur;
> +			cur += 4;
> +		} else {
> +			str_len = *(char *)cur;
> +			cur += 1;
> +		}
> +
> +		str = cur;
> +		switch (header_code) {
> +		case 1:
> +			dbus_message->path = str;
> +			break;
> +		case 2:
> +			dbus_message->interface = str;
> +			break;
> +		case 3:
> +			dbus_message->member = str;
> +			break;
> +		case 6:
> +			dbus_message->destination = str;
> +			break;
> +		case 7:
> +			dbus_message->sender = str;
> +			break;
> +		case 8:
> +			dbus_message->body_signature = str;
> +			break;
> +		}
> +		cur += str_len + 1;
> +	}
> +
> +	dbus_message->padding_end = (8 - (cur - message) % 8) % 8;
> +
> +	/* Jump to body D-Bus alignment craziness */
> +	if ((cur - message) % 8 != 0)
> +		cur += 8 - (cur - message) % 8;
> +	dbus_message->new_header_offset = cur - message;
> +
> +	if (dbus_message->new_header_offset
> +	    + dbus_message->body_length != len) {
> +		pr_warn("Message truncated? " \
> +			"Header %d + Body %d != Length %zd\n",
> +			dbus_message->new_header_offset,
> +			dbus_message->body_length, len);
> +		return -EINVAL;
> +	}
> +
> +	if (dbus_message->body_signature &&
> +	    dbus_message->body_signature[0] == 's') {
> +		int str_len;
> +		str_len = *(u32 *)cur;
> +		cur += 4;
> +		dbus_message->arg0 = cur;
> +		cur += str_len + 1;
> +	}
> +
> +	if ((cur - message) % 4 != 0)
> +		cur += 4 - (cur - message) % 4;
> +
> +	if (dbus_message->body_signature &&
> +	    dbus_message->body_signature[0] == 's' &&
> +	    dbus_message->body_signature[1] == 's') {
> +		int str_len;
> +		str_len = *(u32 *)cur;
> +		cur += 4;
> +		dbus_message->arg1 = cur;
> +		cur += str_len + 1;
> +	}
> +
> +	if ((cur - message) % 4 != 0)
> +		cur += 4 - (cur - message) % 4;
> +
> +	if (dbus_message->body_signature &&
> +	    dbus_message->body_signature[0] == 's' &&
> +	    dbus_message->body_signature[1] == 's' &&
> +	    dbus_message->body_signature[2] == 's') {
> +		int str_len;
> +		str_len = *(u32 *)cur;
> +		cur += 4;
> +		dbus_message->arg2 = cur;
> +		cur += str_len + 1;
> +	}
> +
> +	if ((cur - message) % 4 != 0)
> +		cur += 4 - (cur - message) % 4;
> +
> +	if (dbus_message->type == DBUS_MESSAGE_TYPE_SIGNAL &&
> +	    dbus_message->sender && dbus_message->path &&
> +	    dbus_message->interface && dbus_message->member &&
> +	    dbus_message->arg0 &&
> +	    strcmp(dbus_message->sender, "org.freedesktop.DBus") == 0 &&
> +	    strcmp(dbus_message->interface, "org.freedesktop.DBus") == 0 &&
> +	    strcmp(dbus_message->path, "/org/freedesktop/DBus") == 0) {
> +		if (strcmp(dbus_message->member, "NameAcquired") == 0)
> +			dbus_message->name_acquired = dbus_message->arg0;
> +		else if (strcmp(dbus_message->member, "NameLost") == 0)
> +			dbus_message->name_lost = dbus_message->arg0;
> +	}
> +
> +	return 0;
> +}
> diff --git a/net/netfilter/nfdbus/message.h b/net/netfilter/nfdbus/message.h
> new file mode 100644
> index 0000000..e3ea4d3
> --- /dev/null
> +++ b/net/netfilter/nfdbus/message.h
> @@ -0,0 +1,71 @@
> +/*
> + * message.h  Basic D-Bus message parsing
> + *
> + * Copyright (C) 2010  Collabora Ltd
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + *
> + */
> +
> +#ifndef DBUS_MESSAGE_H
> +#define DBUS_MESSAGE_H
> +
> +#include <linux/list.h>
> +
> +#define DBUS_MAXIMUM_MATCH_RULE_LENGTH 1024
> +
> +/* Types of message */
> +
> +#define DBUS_MESSAGE_TYPE_INVALID       0
> +#define DBUS_MESSAGE_TYPE_METHOD_CALL   1
> +#define DBUS_MESSAGE_TYPE_METHOD_RETURN 2
> +#define DBUS_MESSAGE_TYPE_ERROR         3
> +#define DBUS_MESSAGE_TYPE_SIGNAL        4
> +#define DBUS_NUM_MESSAGE_TYPES          5
> +
> +/* No need to implement a feature-complete parser. It only implement what is
> + * needed by the bus. */
> +struct dbus_message {
> +	char *message;
> +	size_t len;
> +	size_t new_len;
> +
> +	/* direct pointers to the fields */
> +	int type;
> +	char *path;
> +	char *interface;
> +	char *member;
> +	char *destination;
> +	char *sender;
> +	char *body_signature;
> +	int body_length;
> +	char *arg0;
> +	char *arg1;
> +	char *arg2;
> +	char *name_acquired;
> +	char *name_lost;
> +
> +	/* How to add the 'sender' field in the headers */
> +	int new_header_offset;
> +	int len_offset;
> +	int padding_end;
> +};
> +
> +int dbus_message_type_from_string(const char *type_str);
> +
> +int dbus_message_parse(unsigned char *message, size_t len,
> +		       struct dbus_message *dbus_message);
> +
> +#endif /* DBUS_MESSAGE_H */
> -- 
> 1.7.10
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH net-next 15/15] netfilter: add netfilter D-Bus module
From: Vincent Sanders @ 2012-06-29 16:45 UTC (permalink / raw)
  To: netdev, linux-kernel, David S. Miller; +Cc: Alban Crequy
In-Reply-To: <1340988354-26981-1-git-send-email-vincent.sanders@collabora.co.uk>

From: Alban Crequy <alban.crequy@collabora.co.uk>

AF_BUS has netfilter hooks on the packet sending path. This allows the
netfilter subsystem to register netfilter hook handlers.

The netfilter_dbus module allows to inspect D-Bus messages and take
actions based on the information contained on these messages.

Signed-off-by: Alban Crequy <alban.crequy@collabora.co.uk>
---
 net/netfilter/Kconfig         |    2 +
 net/netfilter/Makefile        |    3 +
 net/netfilter/nfdbus/Kconfig  |   12 ++
 net/netfilter/nfdbus/Makefile |    6 +
 net/netfilter/nfdbus/nfdbus.c |  456 +++++++++++++++++++++++++++++++++++++++++
 net/netfilter/nfdbus/nfdbus.h |   44 ++++
 6 files changed, 523 insertions(+)
 create mode 100644 net/netfilter/nfdbus/Kconfig
 create mode 100644 net/netfilter/nfdbus/Makefile
 create mode 100644 net/netfilter/nfdbus/nfdbus.c
 create mode 100644 net/netfilter/nfdbus/nfdbus.h

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index c19b214..a105d9b 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -1187,3 +1187,5 @@ endmenu
 source "net/netfilter/ipset/Kconfig"
 
 source "net/netfilter/ipvs/Kconfig"
+
+source "net/netfilter/nfdbus/Kconfig"
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 1c5160f..6dd4ade 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -123,3 +123,6 @@ obj-$(CONFIG_IP_SET) += ipset/
 
 # IPVS
 obj-$(CONFIG_IP_VS) += ipvs/
+
+# Dbus
+obj-$(CONFIG_NETFILTER_DBUS) += nfdbus/
diff --git a/net/netfilter/nfdbus/Kconfig b/net/netfilter/nfdbus/Kconfig
new file mode 100644
index 0000000..25699a1
--- /dev/null
+++ b/net/netfilter/nfdbus/Kconfig
@@ -0,0 +1,12 @@
+#
+# Netfilter D-Bus module configuration
+#
+config NETFILTER_DBUS
+	tristate "Netfilter D-bus (EXPERIMENTAL)"
+	depends on AF_BUS && CONNECTOR && EXPERIMENTAL
+	---help---
+	  If you say Y here, you will include support for a netfilter hook to
+	  parse D-Bus messages sent using the AF_BUS socket address family.
+
+	  To compile this as a module, choose M here: the module will be
+	  called netfilter_dbus.
diff --git a/net/netfilter/nfdbus/Makefile b/net/netfilter/nfdbus/Makefile
new file mode 100644
index 0000000..1a825f8
--- /dev/null
+++ b/net/netfilter/nfdbus/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for the netfilter D-Bus module
+#
+obj-$(CONFIG_NETFILTER_DBUS) += netfilter_dbus.o
+
+netfilter_dbus-y := nfdbus.o message.o matchrule.o
diff --git a/net/netfilter/nfdbus/nfdbus.c b/net/netfilter/nfdbus/nfdbus.c
new file mode 100644
index 0000000..f6642e2
--- /dev/null
+++ b/net/netfilter/nfdbus/nfdbus.c
@@ -0,0 +1,456 @@
+/*
+ *  nfdbus.c - Netfilter module for AF_BUS/BUS_PROTO_DBUS.
+ */
+
+#define DRIVER_AUTHOR "Alban Crequy"
+#define DRIVER_DESC   "Netfilter module for AF_BUS/BUS_PROTO_DBUS."
+
+#include "nfdbus.h"
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <linux/connector.h>
+#include <net/af_bus.h>
+
+#include "message.h"
+#include "matchrule.h"
+
+static struct nf_hook_ops nfho_dbus;
+
+static struct cb_id cn_cmd_id = { CN_IDX_NFDBUS, CN_VAL_NFDBUS };
+
+static unsigned int hash;
+
+/* Scoped by AF_BUS address */
+struct hlist_head matchrules_table[BUS_HASH_SIZE];
+DEFINE_SPINLOCK(matchrules_lock);
+
+static struct bus_match_maker *find_match_maker(struct sockaddr_bus *addr,
+		bool create, bool delete)
+{
+	u64 hash;
+	struct hlist_node *node;
+	struct bus_match_maker *matchmaker;
+	int path_len = strlen(addr->sbus_path);
+
+	hash = csum_partial(addr->sbus_path,
+			    strlen(addr->sbus_path), 0);
+	hash ^= addr->sbus_addr.s_addr;
+	hash ^= hash >> 32;
+	hash ^= hash >> 16;
+	hash ^= hash >> 8;
+	hash &= 0xff;
+
+	spin_lock(&matchrules_lock);
+	hlist_for_each_entry(matchmaker, node, &matchrules_table[hash],
+			     table_node) {
+		if (addr->sbus_family == matchmaker->addr.sbus_family &&
+		    addr->sbus_addr.s_addr == matchmaker->addr.sbus_addr.s_addr &&
+		    !memcmp(addr->sbus_path, matchmaker->addr.sbus_path,
+			   path_len)) {
+			kref_get(&matchmaker->kref);
+			if (delete)
+				hlist_del(&matchmaker->table_node);
+			spin_unlock(&matchrules_lock);
+			pr_debug("Found matchmaker for hash %llu", hash);
+			return matchmaker;
+		}
+	}
+	spin_unlock(&matchrules_lock);
+
+	if (!create) {
+		pr_debug("Matchmaker for hash %llu not found", hash);
+		return NULL;
+	}
+
+	matchmaker = bus_matchmaker_new(GFP_ATOMIC);
+	matchmaker->addr.sbus_family = addr->sbus_family;
+	matchmaker->addr.sbus_addr.s_addr = addr->sbus_addr.s_addr;
+	memcpy(matchmaker->addr.sbus_path, addr->sbus_path, BUS_PATH_MAX);
+
+	pr_debug("Create new matchmaker for hash %llu\n", hash);
+	spin_lock(&matchrules_lock);
+	hlist_add_head(&matchmaker->table_node, &matchrules_table[hash]);
+	kref_get(&matchmaker->kref);
+	spin_unlock(&matchrules_lock);
+	return matchmaker;
+}
+
+static unsigned int dbus_filter(unsigned int hooknum,
+				struct sk_buff *skb,
+				const struct net_device *in,
+				const struct net_device *out,
+				int (*okfn)(struct sk_buff *))
+{
+	struct bus_send_context	*sendctx;
+	struct bus_match_maker *matchmaker = NULL;
+	struct bus_match_maker *sender = NULL;
+	struct dbus_message msg = {0,};
+	unsigned char *data;
+	size_t len;
+	int err;
+	int ret;
+
+	if (!skb->sk || skb->sk->sk_family != PF_BUS) {
+		WARN(1, "netfilter_dbus received an invalid skb");
+		return NF_DROP;
+	}
+
+	data = skb->data;
+	sendctx = BUSCB(skb).sendctx;
+	if (!sendctx || !sendctx->sender || !sendctx->sender_socket) {
+		WARN(1, "netfilter_dbus received an AF_BUS packet" \
+		     " without context. This is a bug. Dropping the"
+			" packet.");
+		return NF_DROP;
+	}
+
+	if (sendctx->sender_socket->sk->sk_protocol != BUS_PROTO_DBUS) {
+		/* This kernel module is for D-Bus. It must not
+		 * interfere with other users of AF_BUS. */
+		return NF_ACCEPT;
+	}
+	if (sendctx->recipient)
+		matchmaker = find_match_maker(sendctx->recipient, false, false);
+
+	len =  skb_tail_pointer(skb) - data;
+
+	if (sendctx->to_master && sendctx->main_recipient) {
+		pr_debug("AF_BUS packet to the bus master. ACCEPT.\n");
+		ret = NF_ACCEPT;
+		goto out;
+	}
+
+	if (sendctx->main_recipient && !sendctx->bus_master_side) {
+		pr_debug("AF_BUS packet from a peer to a peer (unicast). ACCEPT.\n");
+		ret = NF_ACCEPT;
+		goto out;
+	}
+
+	err = dbus_message_parse(data, len, &msg);
+	if (err) {
+		if (!sendctx->main_recipient) {
+			pr_debug("AF_BUS packet for an eavesdropper or " \
+				 "multicast is not parsable. DROP.\n");
+			ret = NF_DROP;
+			goto out;
+		} else if (sendctx->bus_master_side) {
+			pr_debug("AF_BUS packet from bus master is not parsable. ACCEPT.\n");
+			ret = NF_ACCEPT;
+			goto out;
+		} else {
+			pr_debug("AF_BUS packet from peer is not parsable. DROP.\n");
+			ret = NF_DROP;
+			goto out;
+		}
+	}
+
+	if (sendctx->bus_master_side && !sendctx->main_recipient) {
+		pr_debug("AF_BUS packet '%s' from the bus master is for an " \
+			 "eavesdropper. DROP.\n",
+		       msg.member ? msg.member : "");
+		ret = NF_DROP;
+		goto out;
+	}
+	if (sendctx->bus_master_side) {
+		if (msg.name_acquired) {
+			pr_debug("New name: %s [%p %p].\n",
+				 msg.name_acquired, sendctx->sender,
+				 sendctx->recipient);
+
+			sender = find_match_maker(sendctx->sender, true, false);
+			bus_matchmaker_add_name(sender, msg.name_acquired,
+						GFP_ATOMIC);
+		}
+		if (msg.name_lost) {
+			pr_debug("Lost name: %s [%p %p].\n",
+				 msg.name_lost, sendctx->sender,
+				 sendctx->recipient);
+
+			sender = find_match_maker(sendctx->sender, true, false);
+			bus_matchmaker_remove_name(sender, msg.name_acquired);
+		}
+
+		pr_debug("AF_BUS packet '%s' from the bus master. ACCEPT.\n",
+			 msg.member ? msg.member : "");
+		ret = NF_ACCEPT;
+		goto out;
+	}
+
+	pr_debug("Multicast AF_BUS packet, %ld bytes, " \
+		 "considering recipient %lld...\n", len,
+		 sendctx->recipient ? sendctx->recipient->sbus_addr.s_addr : 0);
+
+	pr_debug("Message type %d %s->%s [iface: %s][member: %s][matchmaker=%p]...\n",
+		 msg.type,
+		 msg.sender ? msg.sender : "",
+		 msg.destination ? msg.destination : "",
+		 msg.interface ? msg.interface : "",
+		 msg.member ? msg.member : "",
+		 matchmaker);
+
+	if (!matchmaker) {
+		pr_debug("No match rules for this recipient. DROP.\n");
+		ret = NF_DROP;
+		goto out;
+	}
+
+	sender = find_match_maker(sendctx->sender, true, false);
+	err = bus_matchmaker_filter(matchmaker, sender, sendctx->eavesdropper,
+				    &msg);
+	if (err) {
+		pr_debug("Matchmaker: ACCEPT.\n");
+		ret = NF_ACCEPT;
+		goto out;
+	} else {
+		pr_debug("Matchmaker: DROP.\n");
+		ret = NF_DROP;
+		goto out;
+	}
+
+out:
+	if (matchmaker)
+		kref_put(&matchmaker->kref, bus_matchmaker_free);
+	if (sender)
+		kref_put(&sender->kref, bus_matchmaker_free);
+	return ret;
+}
+
+/* Taken from drbd_nl_send_reply() */
+static void nfdbus_nl_send_reply(struct cn_msg *msg, int ret_code)
+{
+	char buffer[sizeof(struct cn_msg)+sizeof(struct nfdbus_nl_cfg_reply)];
+	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
+	struct nfdbus_nl_cfg_reply *reply =
+		(struct nfdbus_nl_cfg_reply *)cn_reply->data;
+	int rr;
+
+	memset(buffer, 0, sizeof(buffer));
+	cn_reply->id = msg->id;
+
+	cn_reply->seq = msg->seq;
+	cn_reply->ack = msg->ack  + 1;
+	cn_reply->len = sizeof(struct nfdbus_nl_cfg_reply);
+	cn_reply->flags = 0;
+
+	reply->ret_code = ret_code;
+
+	rr = cn_netlink_send(cn_reply, 0, GFP_NOIO);
+	if (rr && rr != -ESRCH)
+		pr_debug("nfdbus: cn_netlink_send()=%d\n", rr);
+}
+
+/**
+ * nfdbus_check_perm - check if a pid is allowed to update match rules
+ * @sockaddr_bus: the socket address of the bus
+ * @pid: the process id that wants to update the match rules set
+ *
+ * Test if a given process id is allowed to update the match rules set
+ * for this bus. Only the process that owns the bus master listen socket
+ * is allowed to update the match rules set for the bus.
+ */
+static bool nfdbus_check_perm(struct sockaddr_bus *sbusname, pid_t pid)
+{
+	struct net *net = get_net_ns_by_pid(pid);
+	struct sock *s;
+	struct bus_address *addr;
+	struct hlist_node *node;
+	int offset = (sbusname->sbus_path[0] == '\0');
+	int path_len = strnlen(sbusname->sbus_path + offset, BUS_PATH_MAX);
+	int len;
+	if (!net)
+		return false;
+
+	len = path_len + 1 + sizeof(__kernel_sa_family_t) +
+	      sizeof(struct bus_addr);
+
+	spin_lock(&bus_address_lock);
+
+	hlist_for_each_entry(addr, node, &bus_address_table[hash],
+			     table_node) {
+		s = addr->sock;
+
+		if (s->sk_protocol != BUS_PROTO_DBUS)
+			continue;
+
+		if (!net_eq(sock_net(s), net))
+			continue;
+
+		if (addr->len == len &&
+		    addr->name->sbus_family == sbusname->sbus_family &&
+		    addr->name->sbus_addr.s_addr == BUS_MASTER_ADDR &&
+		    bus_same_bus(addr->name, sbusname) &&
+		    pid_nr(s->sk_peer_pid) == pid) {
+			spin_unlock(&bus_address_lock);
+			return true;
+		}
+	}
+
+	spin_unlock(&bus_address_lock);
+
+	return false;
+}
+
+static void cn_cmd_cb(struct cn_msg *msg, struct netlink_skb_parms *nsp)
+{
+	struct nfdbus_nl_cfg_req *nlp = (struct nfdbus_nl_cfg_req *)msg->data;
+	struct cn_msg *cn_reply;
+	struct nfdbus_nl_cfg_reply *reply;
+	int retcode, rr;
+	pid_t pid = task_tgid_vnr(current);
+	int reply_size = sizeof(struct cn_msg)
+		+ sizeof(struct nfdbus_nl_cfg_reply);
+
+	pr_debug("nfdbus: %s nsp->pid=%d pid=%d\n", __func__, nsp->pid, pid);
+
+	if (!nfdbus_check_perm(&nlp->addr, pid)) {
+		pr_debug(KERN_ERR "nfdbus: pid=%d is not allowed!\n", pid);
+		retcode = EPERM;
+		goto fail;
+	}
+
+	cn_reply = kzalloc(reply_size, GFP_KERNEL);
+	if (!cn_reply) {
+		retcode = ENOMEM;
+		goto fail;
+	}
+	reply = (struct nfdbus_nl_cfg_reply *) cn_reply->data;
+
+	if (msg->len < sizeof(struct nfdbus_nl_cfg_req)) {
+		reply->ret_code = EINVAL;
+	} else if (nlp->cmd == NFDBUS_CMD_ADDMATCH) {
+		struct bus_match_rule *rule;
+		struct bus_match_maker *matchmaker;
+		reply->ret_code = 0;
+
+		if (msg->len == 0)
+			reply->ret_code = EINVAL;
+
+		rule = bus_match_rule_parse(nlp->data, GFP_ATOMIC);
+		if (rule) {
+			matchmaker = find_match_maker(&nlp->addr, true, false);
+			pr_debug("Add match rule for matchmaker %p\n",
+				 matchmaker);
+			bus_matchmaker_add_rule(matchmaker, rule);
+			kref_put(&matchmaker->kref, bus_matchmaker_free);
+		} else {
+			reply->ret_code = EINVAL;
+		}
+	} else if (nlp->cmd == NFDBUS_CMD_REMOVEMATCH) {
+		struct bus_match_rule *rule;
+		struct bus_match_maker *matchmaker;
+
+		rule = bus_match_rule_parse(nlp->data, GFP_ATOMIC);
+		matchmaker = find_match_maker(&nlp->addr, false, false);
+		if (!matchmaker) {
+			reply->ret_code = EINVAL;
+		} else {
+			pr_debug("Remove match rule for matchmaker %p\n",
+				 matchmaker);
+			bus_matchmaker_remove_rule_by_value(matchmaker, rule);
+			kref_put(&matchmaker->kref, bus_matchmaker_free);
+			reply->ret_code = 0;
+		}
+		bus_match_rule_free(rule);
+
+	} else if (nlp->cmd == NFDBUS_CMD_REMOVEALLMATCH) {
+		struct bus_match_maker *matchmaker;
+
+		matchmaker = find_match_maker(&nlp->addr, false, true);
+		if (!matchmaker) {
+			reply->ret_code = EINVAL;
+		} else {
+			pr_debug("Remove matchmaker %p\n", matchmaker);
+			kref_put(&matchmaker->kref, bus_matchmaker_free);
+			kref_put(&matchmaker->kref, bus_matchmaker_free);
+			reply->ret_code = 0;
+		}
+
+	} else {
+		reply->ret_code = EINVAL;
+	}
+
+	cn_reply->id = msg->id;
+	cn_reply->seq = msg->seq;
+	cn_reply->ack = msg->ack  + 1;
+	cn_reply->len = sizeof(struct nfdbus_nl_cfg_reply);
+	cn_reply->flags = 0;
+
+	rr = cn_netlink_reply(cn_reply, nsp->pid, GFP_KERNEL);
+	if (rr && rr != -ESRCH)
+		pr_debug("nfdbus: cn_netlink_send()=%d\n", rr);
+	pr_debug("nfdbus: cn_netlink_reply(pid=%d)=%d\n", nsp->pid, rr);
+
+	kfree(cn_reply);
+	return;
+fail:
+	nfdbus_nl_send_reply(msg, retcode);
+}
+
+static int __init nfdbus_init(void)
+{
+	int err;
+	struct bus_addr master_addr;
+
+	master_addr.s_addr = BUS_MASTER_ADDR;
+	hash = bus_compute_hash(master_addr);
+
+	pr_debug("Loading netfilter_dbus\n");
+
+	/* Install D-Bus netfilter hook */
+	nfho_dbus.hook     = dbus_filter;
+	nfho_dbus.hooknum  = NF_BUS_SENDING;
+	nfho_dbus.pf       = NFPROTO_BUS; /* Do not use PF_BUS, you fool! */
+	nfho_dbus.priority = 0;
+	nfho_dbus.owner = THIS_MODULE;
+	err = nf_register_hook(&nfho_dbus);
+	if (err)
+		return err;
+	pr_debug("Netfilter hook for D-Bus: installed.\n");
+
+	/* Install connector hook */
+	err = cn_add_callback(&cn_cmd_id, "nfdbus", cn_cmd_cb);
+	if (err)
+		goto err_cn_cmd_out;
+	pr_debug("Connector hook: installed.\n");
+
+	return 0;
+
+err_cn_cmd_out:
+	nf_unregister_hook(&nfho_dbus);
+
+	return err;
+}
+
+static void __exit nfdbus_cleanup(void)
+{
+	int i;
+	struct hlist_node *node, *tmp;
+	struct bus_match_maker *matchmaker;
+	nf_unregister_hook(&nfho_dbus);
+
+	cn_del_callback(&cn_cmd_id);
+
+	spin_lock(&matchrules_lock);
+	for (i = 0; i < BUS_HASH_SIZE; i++) {
+		hlist_for_each_entry_safe(matchmaker, node, tmp,
+					  &matchrules_table[i], table_node) {
+			hlist_del(&matchmaker->table_node);
+			kref_put(&matchmaker->kref, bus_matchmaker_free);
+		}
+	}
+	spin_unlock(&matchrules_lock);
+
+	pr_debug("Unloading netfilter_dbus\n");
+}
+
+module_init(nfdbus_init);
+module_exit(nfdbus_cleanup);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_ALIAS_NET_PF_PROTO(PF_BUS, BUS_PROTO_DBUS);
diff --git a/net/netfilter/nfdbus/nfdbus.h b/net/netfilter/nfdbus/nfdbus.h
new file mode 100644
index 0000000..477bde3
--- /dev/null
+++ b/net/netfilter/nfdbus/nfdbus.h
@@ -0,0 +1,44 @@
+/*
+ * nfdbus.h  Netfilter module for AF_BUS/BUS_PROTO_DBUS.
+ *
+ * Copyright (C) 2012  Collabora Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef NETFILTER_DBUS_H
+#define NETFILTER_DBUS_H
+
+#include <linux/types.h>
+#include <linux/bus.h>
+
+#define NFDBUS_CMD_ADDMATCH        0x01
+#define NFDBUS_CMD_REMOVEMATCH     0x02
+#define NFDBUS_CMD_REMOVEALLMATCH  0x03
+
+struct nfdbus_nl_cfg_req {
+	__u32 cmd;
+	__u32 len;
+	struct sockaddr_bus addr;
+	__u64 pad;
+	unsigned char data[0];
+};
+
+struct nfdbus_nl_cfg_reply {
+	__u32 ret_code;
+};
+
+#endif /* NETFILTER_DBUS_H */
-- 
1.7.10

^ permalink raw reply related

* [PATCH net-next 14/15] netfilter: nfdbus: Add D-bus match rule implementation
From: Vincent Sanders @ 2012-06-29 16:45 UTC (permalink / raw)
  To: netdev, linux-kernel, David S. Miller
  Cc: Javier Martinez Canillas, Alban Crequy
In-Reply-To: <1340988354-26981-1-git-send-email-vincent.sanders@collabora.co.uk>

From: Javier Martinez Canillas <javier.martinez@collabora.co.uk>

The D-Bus netfilter module needs to decode D-Bus match rules to decide
if a given peer can receive or not a D-Bus message. Add a match rule
implementation to be used by the netfilter D-Bus module.

Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Signed-off-by: Alban Crequy <alban.crequy@collabora.co.uk>
---
 net/netfilter/nfdbus/matchrule.c | 1132 ++++++++++++++++++++++++++++++++++++++
 net/netfilter/nfdbus/matchrule.h |   82 +++
 2 files changed, 1214 insertions(+)
 create mode 100644 net/netfilter/nfdbus/matchrule.c
 create mode 100644 net/netfilter/nfdbus/matchrule.h

diff --git a/net/netfilter/nfdbus/matchrule.c b/net/netfilter/nfdbus/matchrule.c
new file mode 100644
index 0000000..4106bd5
--- /dev/null
+++ b/net/netfilter/nfdbus/matchrule.c
@@ -0,0 +1,1132 @@
+/*
+ * matchrule.c  D-Bus match rule implementation
+ *
+ * Based on signals.c from dbus
+ *
+ * Copyright (C) 2010  Collabora, Ltd.
+ * Copyright (C) 2003, 2005  Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#include "matchrule.h"
+
+#include <linux/rbtree.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#include "message.h"
+
+enum bus_match_flags {
+	BUS_MATCH_MESSAGE_TYPE            = 1 << 0,
+	BUS_MATCH_INTERFACE               = 1 << 1,
+	BUS_MATCH_MEMBER                  = 1 << 2,
+	BUS_MATCH_SENDER                  = 1 << 3,
+	BUS_MATCH_DESTINATION             = 1 << 4,
+	BUS_MATCH_PATH                    = 1 << 5,
+	BUS_MATCH_ARGS                    = 1 << 6,
+	BUS_MATCH_PATH_NAMESPACE          = 1 << 7,
+	BUS_MATCH_CLIENT_IS_EAVESDROPPING = 1 << 8
+};
+
+struct bus_match_rule {
+	/* For debugging only*/
+	char *rule_text;
+
+	unsigned int flags; /**< BusMatchFlags */
+
+	int   message_type;
+	char *interface;
+	char *member;
+	char *sender;
+	char *destination;
+	char *path;
+
+	unsigned int *arg_lens;
+	char **args;
+	int args_len;
+
+	/* bus_match_rule is attached to rule_pool, either in a simple
+	 * double-linked list if the rule does not have any interface, or in a
+	 * red-black tree sorted by interface. If several rules can have the
+	 * same interface, the first one is attached with struct rb_node and the
+	 * next ones are in the list
+	 */
+
+	struct rb_node node;
+	/* Doubly-linked non-circular list. If the rule has an interface, it is
+	 * in the rb tree and the single head is right here. Otherwise, the
+	 * single head is in rule_pool->rules_without_iface. With this data
+	 * structure, we don't need any allocation to insert or remove the rule.
+	 */
+	struct hlist_head first;
+	struct hlist_node list;
+
+	/* used to delete all names from the tree */
+	struct list_head del_list;
+};
+
+struct dbus_name {
+	struct rb_node node;
+	char *name;
+
+	/* used to delete all names from the tree */
+	struct list_head del_list;
+};
+
+#define BUS_MATCH_ARG_IS_PATH  0x8000000u
+
+#define DBUS_STRING_MAX_LENGTH 1024
+
+/** Max length of a match rule string; to keep people from hosing the
+ * daemon with some huge rule
+ */
+#define DBUS_MAXIMUM_MATCH_RULE_LENGTH 1024
+
+struct bus_match_rule *bus_match_rule_new(gfp_t gfp_flags)
+{
+	struct bus_match_rule *rule;
+
+	rule = kzalloc(sizeof(struct bus_match_rule), gfp_flags);
+	if (rule == NULL)
+		return NULL;
+
+	return rule;
+}
+
+void bus_match_rule_free(struct bus_match_rule *rule)
+{
+	kfree(rule->rule_text);
+	kfree(rule->interface);
+	kfree(rule->member);
+	kfree(rule->sender);
+	kfree(rule->destination);
+	kfree(rule->path);
+	kfree(rule->arg_lens);
+
+	/* can't use dbus_free_string_array() since there
+	 * are embedded NULL
+	 */
+	if (rule->args) {
+		int i;
+
+		i = 0;
+		while (i < rule->args_len) {
+			kfree(rule->args[i]);
+			++i;
+		}
+
+		kfree(rule->args);
+	}
+
+	kfree(rule);
+}
+
+static int
+bus_match_rule_set_message_type(struct bus_match_rule *rule,
+				int type,
+				gfp_t gfp_flags)
+{
+	rule->flags |= BUS_MATCH_MESSAGE_TYPE;
+
+	rule->message_type = type;
+
+	return 1;
+}
+
+static int
+bus_match_rule_set_interface(struct bus_match_rule *rule,
+			     const char *interface,
+			     gfp_t gfp_flags)
+{
+	char *new;
+
+	WARN_ON(!interface);
+
+	new = kstrdup(interface, gfp_flags);
+	if (new == NULL)
+		return 0;
+
+	rule->flags |= BUS_MATCH_INTERFACE;
+	kfree(rule->interface);
+	rule->interface = new;
+
+	return 1;
+}
+
+static int
+bus_match_rule_set_member(struct bus_match_rule *rule,
+			  const char *member,
+			  gfp_t gfp_flags)
+{
+	char *new;
+
+	WARN_ON(!member);
+
+	new = kstrdup(member, gfp_flags);
+	if (new == NULL)
+		return 0;
+
+	rule->flags |= BUS_MATCH_MEMBER;
+	kfree(rule->member);
+	rule->member = new;
+
+	return 1;
+}
+
+static int
+bus_match_rule_set_sender(struct bus_match_rule *rule,
+			  const char *sender,
+			  gfp_t gfp_flags)
+{
+	char *new;
+
+	WARN_ON(!sender);
+
+	new = kstrdup(sender, gfp_flags);
+	if (new == NULL)
+		return 0;
+
+	rule->flags |= BUS_MATCH_SENDER;
+	kfree(rule->sender);
+	rule->sender = new;
+
+	return 1;
+}
+
+static int
+bus_match_rule_set_destination(struct bus_match_rule *rule,
+			       const char   *destination,
+			       gfp_t gfp_flags)
+{
+	char *new;
+
+	WARN_ON(!destination);
+
+	new = kstrdup(destination, gfp_flags);
+	if (new == NULL)
+		return 0;
+
+	rule->flags |= BUS_MATCH_DESTINATION;
+	kfree(rule->destination);
+	rule->destination = new;
+
+	return 1;
+}
+
+#define ISWHITE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || \
+		    ((c) == '\r'))
+
+static int find_key(const char *str, int start, char *key, int *value_pos)
+{
+	const char *p;
+	const char *s;
+	const char *key_start;
+	const char *key_end;
+
+	s = str;
+
+	p = s + start;
+
+	while (*p && ISWHITE(*p))
+		++p;
+
+	key_start = p;
+
+	while (*p && *p != '=' && !ISWHITE(*p))
+		++p;
+
+	key_end = p;
+
+	while (*p && ISWHITE(*p))
+		++p;
+
+	if (key_start == key_end) {
+		/* Empty match rules or trailing whitespace are OK */
+		*value_pos = p - s;
+		return 1;
+	}
+
+	if (*p != '=') {
+		pr_warn("Match rule has a key with no subsequent '=' character");
+		return 0;
+	}
+	++p;
+
+	strncat(key, key_start, key_end - key_start);
+
+	*value_pos = p - s;
+
+	return 1;
+}
+
+static int find_value(const char *str, int start, const char *key, char *value,
+		      int *value_end)
+{
+	const char *p;
+	const char *s;
+	char quote_char;
+	int orig_len;
+
+	orig_len = strlen(value);
+
+	s = str;
+
+	p = s + start;
+
+	quote_char = '\0';
+
+	while (*p) {
+		if (quote_char == '\0') {
+			switch (*p) {
+			case '\0':
+				goto done;
+
+			case '\'':
+				quote_char = '\'';
+				goto next;
+
+			case ',':
+				++p;
+				goto done;
+
+			case '\\':
+				quote_char = '\\';
+				goto next;
+
+			default:
+				strncat(value, p, 1);
+			}
+		} else if (quote_char == '\\') {
+			/*\ only counts as an escape if escaping a quote mark */
+			if (*p != '\'')
+				strncat(value, "\\", 1);
+
+			strncat(value, p, 1);
+
+			quote_char = '\0';
+		} else {
+			if (*p == '\'')
+				quote_char = '\0';
+			else
+				strncat(value, p, 1);
+		}
+
+next:
+		++p;
+	}
+
+done:
+
+	if (quote_char == '\\')
+		strncat(value, "\\", 1);
+	else if (quote_char == '\'') {
+		pr_warn("Unbalanced quotation marks in match rule");
+		return 0;
+	}
+
+	/* Zero-length values are allowed */
+
+	*value_end = p - s;
+
+	return 1;
+}
+
+/* duplicates aren't allowed so the real legitimate max is only 6 or
+ * so. Leaving extra so we don't have to bother to update it.
+ * FIXME this is sort of busted now with arg matching, but we let
+ * you match on up to 10 args for now
+ */
+#define MAX_RULE_TOKENS 16
+
+/* this is slightly too high level to be termed a "token"
+ * but let's not be pedantic.
+ */
+struct rule_token {
+	char *key;
+	char *value;
+};
+
+static int tokenize_rule(const char *rule_text,
+			 struct rule_token tokens[MAX_RULE_TOKENS],
+			 gfp_t gfp_flags)
+{
+	int i;
+	int pos;
+	int retval;
+
+	retval = 0;
+
+	i = 0;
+	pos = 0;
+	while (i < MAX_RULE_TOKENS &&
+	       pos < strlen(rule_text)) {
+		char *key;
+		char *value;
+
+		key = kzalloc(DBUS_STRING_MAX_LENGTH, gfp_flags);
+		if (!key) {
+			pr_err("Out of memory");
+			return 0;
+		}
+
+		value = kzalloc(DBUS_STRING_MAX_LENGTH, gfp_flags);
+		if (!value) {
+			kfree(key);
+			pr_err("Out of memory");
+			return 0;
+		}
+
+		if (!find_key(rule_text, pos, key, &pos))
+			goto out;
+
+		if (strlen(key) == 0)
+			goto next;
+
+		tokens[i].key = key;
+
+		if (!find_value(rule_text, pos, tokens[i].key, value, &pos))
+			goto out;
+
+		tokens[i].value = value;
+
+next:
+		++i;
+	}
+
+	retval = 1;
+
+out:
+	if (!retval) {
+		i = 0;
+		while (tokens[i].key || tokens[i].value) {
+			kfree(tokens[i].key);
+			kfree(tokens[i].value);
+			tokens[i].key = NULL;
+			tokens[i].value = NULL;
+			++i;
+		}
+	}
+
+	return retval;
+}
+
+/*
+ * The format is comma-separated with strings quoted with single quotes
+ * as for the shell (to escape a literal single quote, use '\'').
+ *
+ * type='signal',sender='org.freedesktop.DBus',interface='org.freedesktop.DBus',
+ * member='Foo', path='/bar/foo',destination=':452345.34'
+ *
+ */
+struct bus_match_rule *bus_match_rule_parse(const char *rule_text,
+					    gfp_t gfp_flags)
+{
+	struct bus_match_rule *rule;
+	struct rule_token tokens[MAX_RULE_TOKENS+1]; /* NULL termination + 1 */
+	int i;
+
+	if (strlen(rule_text) > DBUS_MAXIMUM_MATCH_RULE_LENGTH) {
+		pr_warn("Match rule text is %ld bytes, maximum is %d",
+			    strlen(rule_text),
+			    DBUS_MAXIMUM_MATCH_RULE_LENGTH);
+		return NULL;
+	}
+
+	memset(tokens, '\0', sizeof(tokens));
+
+	rule = bus_match_rule_new(gfp_flags);
+	if (rule == NULL) {
+		pr_err("Out of memory");
+		goto failed;
+	}
+
+	rule->rule_text = kstrdup(rule_text, gfp_flags);
+	if (rule->rule_text == NULL) {
+		pr_err("Out of memory");
+		goto failed;
+	}
+
+	if (!tokenize_rule(rule_text, tokens, gfp_flags))
+		goto failed;
+
+	i = 0;
+	while (tokens[i].key != NULL) {
+		const char *key = tokens[i].key;
+		const char *value = tokens[i].value;
+
+		if (strcmp(key, "type") == 0) {
+			int t;
+
+			if (rule->flags & BUS_MATCH_MESSAGE_TYPE) {
+				pr_warn("Key %s specified twice in match rule\n",
+					key);
+				goto failed;
+			}
+
+			t = dbus_message_type_from_string(value);
+
+			if (t == DBUS_MESSAGE_TYPE_INVALID) {
+				pr_warn("Invalid message type (%s) in match rule\n",
+					value);
+				goto failed;
+			}
+
+			if (!bus_match_rule_set_message_type(rule, t,
+							     gfp_flags)) {
+				pr_err("Out of memeory");
+				goto failed;
+			}
+		} else if (strcmp(key, "sender") == 0) {
+			if (rule->flags & BUS_MATCH_SENDER) {
+				pr_warn("Key %s specified twice in match rule\n",
+					key);
+				goto failed;
+			}
+
+			if (!bus_match_rule_set_sender(rule, value,
+						       gfp_flags)) {
+				pr_err("Out of memeory");
+				goto failed;
+			}
+		} else if (strcmp(key, "interface") == 0) {
+			if (rule->flags & BUS_MATCH_INTERFACE) {
+				pr_warn("Key %s specified twice in match rule\n",
+					key);
+				goto failed;
+			}
+
+			if (!bus_match_rule_set_interface(rule, value,
+							  gfp_flags)) {
+				pr_err("Out of memeory");
+				goto failed;
+			}
+		} else if (strcmp(key, "member") == 0) {
+			if (rule->flags & BUS_MATCH_MEMBER) {
+				pr_warn("Key %s specified twice in match rule\n",
+					key);
+				goto failed;
+			}
+
+			if (!bus_match_rule_set_member(rule, value,
+						       gfp_flags)) {
+				pr_err("Out of memeory");
+				goto failed;
+			}
+		} else if (strcmp(key, "destination") == 0) {
+			if (rule->flags & BUS_MATCH_DESTINATION) {
+				pr_warn("Key %s specified twice in match rule\n",
+					key);
+				goto failed;
+			}
+
+			if (!bus_match_rule_set_destination(rule, value,
+							    gfp_flags)) {
+				pr_err("Out of memeory");
+				goto failed;
+			}
+		} else if (strcmp(key, "eavesdrop") == 0) {
+			if (strcmp(value, "true") == 0) {
+				rule->flags |= BUS_MATCH_CLIENT_IS_EAVESDROPPING;
+			} else if (strcmp(value, "false") == 0) {
+				rule->flags &= ~(BUS_MATCH_CLIENT_IS_EAVESDROPPING);
+			} else {
+				pr_warn("eavesdrop='%s' is invalid, " \
+					"it should be 'true' or 'false'\n",
+					value);
+				goto failed;
+			}
+		} else if (strncmp(key, "arg", 3) != 0) {
+			pr_warn("Unknown key \"%s\" in match rule\n",
+				   key);
+			goto failed;
+		}
+
+		++i;
+	}
+
+	goto out;
+
+failed:
+	if (rule) {
+		bus_match_rule_free(rule);
+		rule = NULL;
+	}
+
+out:
+
+	i = 0;
+	while (tokens[i].key || tokens[i].value) {
+		WARN_ON(i >= MAX_RULE_TOKENS);
+		kfree(tokens[i].key);
+		kfree(tokens[i].value);
+		++i;
+	}
+
+	return rule;
+}
+
+/* return the match rule containing the hlist_head. It may not be the first
+ * match rule in the list. */
+struct bus_match_rule *match_rule_search(struct rb_root *root,
+					 const char *interface)
+{
+	struct rb_node *node = root->rb_node;
+
+	while (node) {
+		struct bus_match_rule *data =
+			container_of(node, struct bus_match_rule, node);
+		int result;
+
+		result = strcmp(interface, data->interface);
+
+		if (result < 0)
+			node = node->rb_left;
+		else if (result > 0)
+			node = node->rb_right;
+		else
+			return data;
+	}
+	return NULL;
+}
+
+void match_rule_insert(struct rb_root *root, struct bus_match_rule *data)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+	/* Figure out where to put new node */
+	while (*new) {
+		struct bus_match_rule *this =
+			container_of(*new, struct bus_match_rule, node);
+		int result = strcmp(data->interface, this->interface);
+
+		parent = *new;
+		if (result < 0)
+			new = &((*new)->rb_left);
+		else if (result > 0)
+			new = &((*new)->rb_right);
+		else {
+			/* the head is not used */
+			INIT_HLIST_HEAD(&data->first);
+			/* Add it at the beginning of the list */
+			hlist_add_head(&data->list, &this->first);
+			return;
+		}
+	}
+
+	/* this rule is single in its list */
+	INIT_HLIST_HEAD(&data->first);
+	hlist_add_head(&data->list, &data->first);
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, root);
+}
+
+struct bus_match_maker *bus_matchmaker_new(gfp_t gfp_flags)
+{
+	struct bus_match_maker *matchmaker;
+	int i;
+
+	matchmaker = kzalloc(sizeof(struct bus_match_maker), gfp_flags);
+	if (matchmaker == NULL)
+		return NULL;
+
+	for (i = DBUS_MESSAGE_TYPE_INVALID; i < DBUS_NUM_MESSAGE_TYPES; i++) {
+		struct rule_pool *p = matchmaker->rules_by_type + i;
+
+		p->rules_by_iface = RB_ROOT;
+	}
+
+	kref_init(&matchmaker->kref);
+
+	return matchmaker;
+}
+
+void bus_matchmaker_free(struct kref *kref)
+{
+	struct bus_match_maker *matchmaker;
+	struct list_head del_list;
+	struct rb_node *n;
+	int i;
+
+	matchmaker = container_of(kref, struct bus_match_maker, kref);
+
+	/* free names */
+	INIT_LIST_HEAD(&del_list);
+	n = matchmaker->names.rb_node;
+	if (n) {
+		struct dbus_name *dbus_name, *cur, *tmp;
+
+		dbus_name = rb_entry(n, struct dbus_name, node);
+		list_add_tail(&dbus_name->del_list, &del_list);
+
+		list_for_each_entry(cur, &del_list, del_list) {
+			struct dbus_name *right, *left;
+			if (cur->node.rb_right) {
+				right = rb_entry(cur->node.rb_right,
+						 struct dbus_name, node);
+				list_add_tail(&right->del_list, &del_list);
+			}
+			if (cur->node.rb_left) {
+				left = rb_entry(cur->node.rb_left,
+						struct dbus_name, node);
+				list_add_tail(&left->del_list, &del_list);
+			}
+		}
+		list_for_each_entry_safe(dbus_name, tmp, &del_list, del_list) {
+			kfree(dbus_name->name);
+			list_del(&dbus_name->del_list);
+			kfree(dbus_name);
+		}
+	}
+	WARN_ON(!list_empty_careful(&del_list));
+
+	/* free match rules */
+	for (i = 0 ; i < DBUS_NUM_MESSAGE_TYPES ; i++) {
+		struct rule_pool *pool = matchmaker->rules_by_type + i;
+		struct bus_match_rule *match_rule, *cur, *tmp;
+		struct hlist_node *list_tmp, *list_tmp2;
+
+		/* free match rules from the list */
+		hlist_for_each_entry_safe(cur, list_tmp, list_tmp2,
+					  &pool->rules_without_iface, list) {
+			bus_match_rule_free(cur);
+		}
+
+		/* free match rules from the tree */
+		if (!pool->rules_by_iface.rb_node)
+			continue;
+		match_rule = rb_entry(pool->rules_by_iface.rb_node,
+				      struct bus_match_rule, node);
+		list_add_tail(&match_rule->del_list, &del_list);
+
+		list_for_each_entry(cur, &del_list, del_list) {
+			struct bus_match_rule *right, *left;
+			if (cur->node.rb_right) {
+				right = rb_entry(cur->node.rb_right,
+						 struct bus_match_rule, node);
+				list_add_tail(&right->del_list, &del_list);
+			}
+			if (cur->node.rb_left) {
+				left = rb_entry(cur->node.rb_left,
+						struct bus_match_rule, node);
+				list_add_tail(&left->del_list, &del_list);
+			}
+		}
+		list_for_each_entry_safe(match_rule, tmp, &del_list, del_list) {
+			/* keep a ref during the loop to ensure the first
+			 * iteration of the loop does not delete it */
+			hlist_for_each_entry_safe(cur, list_tmp, list_tmp2,
+						  &match_rule->first, list) {
+				if (cur != match_rule)
+					bus_match_rule_free(cur);
+			}
+			list_del(&match_rule->del_list);
+			bus_match_rule_free(match_rule);
+		}
+		WARN_ON(!list_empty_careful(&del_list));
+	}
+
+	kfree(matchmaker);
+}
+
+/* The rule can't be modified after it's added. */
+int bus_matchmaker_add_rule(struct bus_match_maker *matchmaker,
+			    struct bus_match_rule *rule)
+{
+	struct rule_pool *pool;
+
+	WARN_ON(rule->message_type < 0);
+	WARN_ON(rule->message_type >= DBUS_NUM_MESSAGE_TYPES);
+
+	pool = matchmaker->rules_by_type + rule->message_type;
+
+	if (rule->interface)
+		match_rule_insert(&pool->rules_by_iface, rule);
+	else
+		hlist_add_head(&rule->list, &pool->rules_without_iface);
+
+	return 1;
+}
+
+static int match_rule_equal(struct bus_match_rule *a,
+			    struct bus_match_rule *b)
+{
+	if (a->flags != b->flags)
+		return 0;
+
+	if ((a->flags & BUS_MATCH_MESSAGE_TYPE) &&
+	    a->message_type != b->message_type)
+		return 0;
+
+	if ((a->flags & BUS_MATCH_MEMBER) &&
+	    strcmp(a->member, b->member) != 0)
+		return 0;
+
+	if ((a->flags & BUS_MATCH_PATH) &&
+	    strcmp(a->path, b->path) != 0)
+		return 0;
+
+	if ((a->flags & BUS_MATCH_INTERFACE) &&
+	    strcmp(a->interface, b->interface) != 0)
+		return 0;
+
+	if ((a->flags & BUS_MATCH_SENDER) &&
+	    strcmp(a->sender, b->sender) != 0)
+		return 0;
+
+	if ((a->flags & BUS_MATCH_DESTINATION) &&
+	    strcmp(a->destination, b->destination) != 0)
+		return 0;
+
+	if (a->flags & BUS_MATCH_ARGS) {
+		int i;
+
+		if (a->args_len != b->args_len)
+			return 0;
+
+		i = 0;
+		while (i < a->args_len) {
+			int length;
+
+			if ((a->args[i] != NULL) != (b->args[i] != NULL))
+				return 0;
+
+			if (a->arg_lens[i] != b->arg_lens[i])
+				return 0;
+
+			length = a->arg_lens[i] & ~BUS_MATCH_ARG_IS_PATH;
+
+			if (a->args[i] != NULL) {
+				WARN_ON(!b->args[i]);
+				if (memcmp(a->args[i], b->args[i], length) != 0)
+					return 0;
+			}
+
+			++i;
+		}
+	}
+
+	return 1;
+}
+
+/* Remove a single rule which is equal to the given rule by value */
+void bus_matchmaker_remove_rule_by_value(struct bus_match_maker *matchmaker,
+					 struct bus_match_rule *rule)
+{
+	struct rule_pool *pool;
+
+	WARN_ON(rule->message_type < 0);
+	WARN_ON(rule->message_type >= DBUS_NUM_MESSAGE_TYPES);
+
+	pool = matchmaker->rules_by_type + rule->message_type;
+
+	if (rule->interface) {
+		struct bus_match_rule *head =
+			match_rule_search(&pool->rules_by_iface,
+					  rule->interface);
+
+		struct hlist_node *cur;
+		struct bus_match_rule *cur_rule;
+		hlist_for_each_entry(cur_rule, cur, &head->first, list) {
+			if (match_rule_equal(cur_rule, rule)) {
+				hlist_del(cur);
+				if (hlist_empty(&head->first))
+					rb_erase(&head->node,
+						 &pool->rules_by_iface);
+				bus_match_rule_free(cur_rule);
+				break;
+			}
+		}
+	} else {
+		struct hlist_head *head = &pool->rules_without_iface;
+
+		struct hlist_node *cur;
+		struct bus_match_rule *cur_rule;
+		hlist_for_each_entry(cur_rule, cur, head, list) {
+			if (match_rule_equal(cur_rule, rule)) {
+				hlist_del(cur);
+				bus_match_rule_free(cur_rule);
+				break;
+			}
+		}
+	}
+
+}
+
+static int connection_is_primary_owner(struct bus_match_maker *connection,
+				       const char *service_name)
+{
+	struct rb_node *node = connection->names.rb_node;
+
+	if (!service_name)
+		return 0;
+
+	while (node) {
+		struct dbus_name *data = container_of(node, struct dbus_name,
+						      node);
+		int result;
+
+		result = strcmp(service_name, data->name);
+
+		if (result < 0)
+			node = node->rb_left;
+		else if (result > 0)
+			node = node->rb_right;
+		else
+			return 1;
+	}
+	return 0;
+}
+
+static int match_rule_matches(struct bus_match_maker *matchmaker,
+			      struct bus_match_maker *sender,
+			      int eavesdrop,
+			      struct bus_match_rule *rule,
+			      const struct dbus_message *message)
+{
+	/* Don't consider the rule if this is a eavesdropping match rule
+	 * and eavesdropping is not allowed on that peer */
+	if ((rule->flags & BUS_MATCH_CLIENT_IS_EAVESDROPPING) && !eavesdrop)
+		return 0;
+
+	/* Since D-Bus 1.5.6, match rules do not match messages which have a
+	 * DESTINATION field unless the match rule specifically requests this
+	 * by specifying eavesdrop='true' in the match rule. */
+	if (message->destination &&
+	    !(rule->flags & BUS_MATCH_CLIENT_IS_EAVESDROPPING))
+		return 0;
+
+	if (rule->flags & BUS_MATCH_MEMBER) {
+		const char *member;
+
+		WARN_ON(!rule->member);
+
+		member = message->member;
+		if (member == NULL)
+			return 0;
+
+		if (strcmp(member, rule->member) != 0)
+			return 0;
+	}
+
+	if (rule->flags & BUS_MATCH_SENDER) {
+		WARN_ON(!rule->sender);
+
+		if (sender == NULL) {
+			if (strcmp(rule->sender,
+				   "org.freedesktop.DBus") != 0)
+				return 0;
+		} else
+			if (!connection_is_primary_owner(sender, rule->sender))
+				return 0;
+	}
+
+	if (rule->flags & BUS_MATCH_DESTINATION) {
+		const char *destination;
+
+		WARN_ON(!rule->destination);
+
+		destination = message->destination;
+		if (destination == NULL)
+			return 0;
+
+		/* This will not just work out of the box because it this is
+		 * an eavesdropping match rule. */
+		if (matchmaker == NULL) {
+			if (strcmp(rule->destination,
+				   "org.freedesktop.DBus") != 0)
+				return 0;
+		} else
+			if (!connection_is_primary_owner(matchmaker,
+							 rule->destination))
+				return 0;
+	}
+
+	if (rule->flags & BUS_MATCH_PATH) {
+		const char *path;
+
+		WARN_ON(!rule->path);
+
+		path = message->path;
+		if (path == NULL)
+			return 0;
+
+		if (strcmp(path, rule->path) != 0)
+			return 0;
+	}
+
+	return 1;
+}
+
+static bool get_recipients_from_list(struct bus_match_maker *matchmaker,
+				     struct bus_match_maker *sender,
+				     int eavesdrop,
+				     struct hlist_head *rules,
+				     const struct dbus_message *message)
+{
+	struct hlist_node *cur;
+	struct bus_match_rule *rule;
+
+	if (rules == NULL) {
+		pr_debug("no rules of this type\n");
+		return 0;
+	}
+
+	hlist_for_each_entry(rule, cur, rules, list) {
+		if (match_rule_matches(matchmaker, sender, eavesdrop, rule,
+					message)) {
+			pr_debug("[YES] deliver with match rule \"%s\"\n",
+				 rule->rule_text);
+			return 1;
+		} else {
+			pr_debug("[NO]  deliver with match rule \"%s\"\n",
+				 rule->rule_text);
+		}
+	}
+	pr_debug("[NO]  no match rules\n");
+	return 0;
+}
+
+static struct hlist_head
+*bus_matchmaker_get_rules(struct bus_match_maker *matchmaker,
+			  int message_type, const char *interface)
+{
+	static struct hlist_head empty = {0,};
+	struct rule_pool *p;
+
+	WARN_ON(message_type < 0);
+	WARN_ON(message_type >= DBUS_NUM_MESSAGE_TYPES);
+
+	p = matchmaker->rules_by_type + message_type;
+
+	if (interface == NULL)
+		return &p->rules_without_iface;
+	else {
+		struct bus_match_rule *rule =
+			match_rule_search(&p->rules_by_iface, interface);
+		if (rule)
+			return &rule->first;
+		else
+			return &empty;
+	}
+}
+
+bool bus_matchmaker_filter(struct bus_match_maker *matchmaker,
+			   struct bus_match_maker *sender,
+			   int eavesdrop,
+			   const struct dbus_message *message)
+{
+	int type;
+	const char *interface;
+	struct hlist_head *neither, *just_type, *just_iface, *both;
+
+	type = message->type;
+	interface = message->interface;
+
+	neither = bus_matchmaker_get_rules(matchmaker,
+					   DBUS_MESSAGE_TYPE_INVALID, NULL);
+	just_type = just_iface = both = NULL;
+
+	if (interface != NULL)
+		just_iface = bus_matchmaker_get_rules(matchmaker,
+						      DBUS_MESSAGE_TYPE_INVALID,
+						      interface);
+
+	if (type > DBUS_MESSAGE_TYPE_INVALID && type < DBUS_NUM_MESSAGE_TYPES) {
+		just_type = bus_matchmaker_get_rules(matchmaker, type, NULL);
+
+		if (interface != NULL)
+			both = bus_matchmaker_get_rules(matchmaker, type,
+							interface);
+	}
+
+	if (get_recipients_from_list(matchmaker, sender, eavesdrop, neither,
+				     message))
+		return 1;
+	if (get_recipients_from_list(matchmaker, sender, eavesdrop, just_iface,
+				     message))
+		return 1;
+	if (get_recipients_from_list(matchmaker, sender, eavesdrop, just_type,
+				     message))
+		return 1;
+	if (get_recipients_from_list(matchmaker, sender, eavesdrop, both,
+				     message))
+		return 1;
+
+	return connection_is_primary_owner(matchmaker, message->destination);
+}
+
+void bus_matchmaker_add_name(struct bus_match_maker *matchmaker,
+			     const char *name,
+			     gfp_t gfp_flags)
+{
+	struct dbus_name *dbus_name;
+	struct rb_node **new = &(matchmaker->names.rb_node), *parent = NULL;
+
+	dbus_name = kmalloc(sizeof(struct dbus_name), gfp_flags);
+	if (!dbus_name)
+		return;
+	dbus_name->name = kstrdup(name, gfp_flags);
+	if (!dbus_name->name)
+		return;
+
+	/* Figure out where to put new node */
+	while (*new) {
+		struct dbus_name *this = container_of(*new, struct dbus_name,
+						      node);
+		int result = strcmp(dbus_name->name, this->name);
+
+		parent = *new;
+		if (result < 0)
+			new = &((*new)->rb_left);
+		else if (result > 0)
+			new = &((*new)->rb_right);
+		else
+			return;
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&dbus_name->node, parent, new);
+	rb_insert_color(&dbus_name->node, &matchmaker->names);
+}
+
+void bus_matchmaker_remove_name(struct bus_match_maker *matchmaker,
+				const char *name)
+{
+	struct rb_node *node = matchmaker->names.rb_node;
+
+	while (node) {
+		struct dbus_name *data = container_of(node, struct dbus_name,
+						      node);
+		int result;
+
+		result = strcmp(name, data->name);
+
+		if (result < 0)
+			node = node->rb_left;
+		else if (result > 0)
+			node = node->rb_right;
+		else {
+			rb_erase(&data->node, &matchmaker->names);
+			kfree(data->name);
+			kfree(data);
+		}
+	}
+
+}
+
diff --git a/net/netfilter/nfdbus/matchrule.h b/net/netfilter/nfdbus/matchrule.h
new file mode 100644
index 0000000..e16580c
--- /dev/null
+++ b/net/netfilter/nfdbus/matchrule.h
@@ -0,0 +1,82 @@
+/*
+ * signals.h  Bus signal connection implementation
+ *
+ * Copyright (C) 2003  Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef BUS_SIGNALS_H
+#define BUS_SIGNALS_H
+
+#include <linux/gfp.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/slab.h>
+#include <net/af_bus.h>
+
+#include "message.h"
+
+struct bus_match_rule *bus_match_rule_new(gfp_t gfp_flags);
+void bus_match_rule_free(struct bus_match_rule *rule);
+
+struct bus_match_rule *bus_match_rule_parse(const char *rule_text,
+					    gfp_t gfp_flags);
+
+struct rule_pool {
+	/* Maps non-NULL interface names to a list of bus_match_rule */
+	struct rb_root rules_by_iface;
+
+	/* List of bus_match_rule which don't specify an interface */
+	struct hlist_head rules_without_iface;
+};
+
+struct bus_match_maker {
+	struct sockaddr_bus addr;
+
+	struct hlist_node table_node;
+
+	/* Pools of rules, grouped by the type of message they match. 0
+	 * (DBUS_MESSAGE_TYPE_INVALID) represents rules that do not specify a
+	 * message type.
+	 */
+	struct rule_pool rules_by_type[DBUS_NUM_MESSAGE_TYPES];
+
+	struct rb_root names;
+
+	struct kref kref;
+};
+
+
+struct bus_match_maker *bus_matchmaker_new(gfp_t gfp_flags);
+void bus_matchmaker_free(struct kref *kref);
+
+int bus_matchmaker_add_rule(struct bus_match_maker *matchmaker,
+			    struct bus_match_rule *rule);
+void bus_matchmaker_remove_rule_by_value(struct bus_match_maker *matchmaker,
+					 struct bus_match_rule *value);
+
+bool bus_matchmaker_filter(struct bus_match_maker *matchmaker,
+			   struct bus_match_maker *sender,
+			   int eavesdrop,
+			   const struct dbus_message *message);
+
+void bus_matchmaker_add_name(struct bus_match_maker *matchmaker,
+			     const char *name, gfp_t gfp_flags);
+void bus_matchmaker_remove_name(struct bus_match_maker *matchmaker,
+				const char *name);
+
+#endif /* BUS_SIGNALS_H */
-- 
1.7.10

^ permalink raw reply related

* [PATCH net-next 13/15] netfilter: nfdbus: Add D-bus message parsing
From: Vincent Sanders @ 2012-06-29 16:45 UTC (permalink / raw)
  To: netdev, linux-kernel, David S. Miller
  Cc: Javier Martinez Canillas, Alban Crequy
In-Reply-To: <1340988354-26981-1-git-send-email-vincent.sanders@collabora.co.uk>

From: Javier Martinez Canillas <javier.martinez@collabora.co.uk>

The netfilter D-Bus module needs to parse D-bus messages sent by
applications to decide whether a peer can receive or not a D-Bus
message. Add D-bus message parsing logic to be able to analyze.

Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Signed-off-by: Alban Crequy <alban.crequy@collabora.co.uk>
---
 net/netfilter/nfdbus/message.c |  194 ++++++++++++++++++++++++++++++++++++++++
 net/netfilter/nfdbus/message.h |   71 +++++++++++++++
 2 files changed, 265 insertions(+)
 create mode 100644 net/netfilter/nfdbus/message.c
 create mode 100644 net/netfilter/nfdbus/message.h

diff --git a/net/netfilter/nfdbus/message.c b/net/netfilter/nfdbus/message.c
new file mode 100644
index 0000000..93c409c
--- /dev/null
+++ b/net/netfilter/nfdbus/message.c
@@ -0,0 +1,194 @@
+/*
+ * message.c  Basic D-Bus message parsing
+ *
+ * Copyright (C) 2010-2012  Collabora Ltd
+ * Authors:	Alban Crequy <alban.crequy@collabora.co.uk>
+ * Copyright (C) 2002, 2003, 2004, 2005  Red Hat Inc.
+ * Copyright (C) 2002, 2003  CodeFactory AB
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#include <linux/slab.h>
+
+#include "message.h"
+
+int dbus_message_type_from_string(const char *type_str)
+{
+	if (strcmp(type_str, "method_call") == 0)
+		return DBUS_MESSAGE_TYPE_METHOD_CALL;
+	if (strcmp(type_str, "method_return") == 0)
+		return DBUS_MESSAGE_TYPE_METHOD_RETURN;
+	else if (strcmp(type_str, "signal") == 0)
+		return DBUS_MESSAGE_TYPE_SIGNAL;
+	else if (strcmp(type_str, "error") == 0)
+		return DBUS_MESSAGE_TYPE_ERROR;
+	else
+		return DBUS_MESSAGE_TYPE_INVALID;
+}
+
+int dbus_message_parse(unsigned char *message, size_t len,
+		       struct dbus_message *dbus_message)
+{
+	unsigned char *cur;
+	int array_header_len;
+
+	dbus_message->message = message;
+
+	if (len < 4 + 4 + 4 + 4 || message[1] == 0 || message[1] > 4)
+		return -EINVAL;
+
+	dbus_message->type = message[1];
+	dbus_message->body_length = *((u32 *)(message + 4));
+	cur = message + 12;
+	array_header_len = *(u32 *)cur;
+	dbus_message->len_offset = 12;
+	cur += 4;
+	while (cur < message + len
+	       && cur < message + 12 + 4 + array_header_len) {
+		int header_code;
+		int signature_len;
+		unsigned char *signature;
+		int str_len;
+		unsigned char *str;
+
+		/* D-Bus alignment craziness */
+		if ((cur - message) % 8 != 0)
+			cur += 8 - (cur - message) % 8;
+
+		header_code = *(char *)cur;
+		cur++;
+		signature_len = *(char *)cur;
+		/* All header fields of the current D-Bus spec have a simple
+		 * type, either o, s, g, or u */
+		if (signature_len != 1)
+			return -EINVAL;
+		cur++;
+		signature = cur;
+		cur += signature_len + 1;
+		if (signature[0] != 'o' &&
+		    signature[0] != 's' &&
+		    signature[0] != 'g' &&
+		    signature[0] != 'u')
+			return -EINVAL;
+
+		if (signature[0] == 'u') {
+			cur += 4;
+			continue;
+		}
+
+		if (signature[0] != 'g') {
+			str_len = *(u32 *)cur;
+			cur += 4;
+		} else {
+			str_len = *(char *)cur;
+			cur += 1;
+		}
+
+		str = cur;
+		switch (header_code) {
+		case 1:
+			dbus_message->path = str;
+			break;
+		case 2:
+			dbus_message->interface = str;
+			break;
+		case 3:
+			dbus_message->member = str;
+			break;
+		case 6:
+			dbus_message->destination = str;
+			break;
+		case 7:
+			dbus_message->sender = str;
+			break;
+		case 8:
+			dbus_message->body_signature = str;
+			break;
+		}
+		cur += str_len + 1;
+	}
+
+	dbus_message->padding_end = (8 - (cur - message) % 8) % 8;
+
+	/* Jump to body D-Bus alignment craziness */
+	if ((cur - message) % 8 != 0)
+		cur += 8 - (cur - message) % 8;
+	dbus_message->new_header_offset = cur - message;
+
+	if (dbus_message->new_header_offset
+	    + dbus_message->body_length != len) {
+		pr_warn("Message truncated? " \
+			"Header %d + Body %d != Length %zd\n",
+			dbus_message->new_header_offset,
+			dbus_message->body_length, len);
+		return -EINVAL;
+	}
+
+	if (dbus_message->body_signature &&
+	    dbus_message->body_signature[0] == 's') {
+		int str_len;
+		str_len = *(u32 *)cur;
+		cur += 4;
+		dbus_message->arg0 = cur;
+		cur += str_len + 1;
+	}
+
+	if ((cur - message) % 4 != 0)
+		cur += 4 - (cur - message) % 4;
+
+	if (dbus_message->body_signature &&
+	    dbus_message->body_signature[0] == 's' &&
+	    dbus_message->body_signature[1] == 's') {
+		int str_len;
+		str_len = *(u32 *)cur;
+		cur += 4;
+		dbus_message->arg1 = cur;
+		cur += str_len + 1;
+	}
+
+	if ((cur - message) % 4 != 0)
+		cur += 4 - (cur - message) % 4;
+
+	if (dbus_message->body_signature &&
+	    dbus_message->body_signature[0] == 's' &&
+	    dbus_message->body_signature[1] == 's' &&
+	    dbus_message->body_signature[2] == 's') {
+		int str_len;
+		str_len = *(u32 *)cur;
+		cur += 4;
+		dbus_message->arg2 = cur;
+		cur += str_len + 1;
+	}
+
+	if ((cur - message) % 4 != 0)
+		cur += 4 - (cur - message) % 4;
+
+	if (dbus_message->type == DBUS_MESSAGE_TYPE_SIGNAL &&
+	    dbus_message->sender && dbus_message->path &&
+	    dbus_message->interface && dbus_message->member &&
+	    dbus_message->arg0 &&
+	    strcmp(dbus_message->sender, "org.freedesktop.DBus") == 0 &&
+	    strcmp(dbus_message->interface, "org.freedesktop.DBus") == 0 &&
+	    strcmp(dbus_message->path, "/org/freedesktop/DBus") == 0) {
+		if (strcmp(dbus_message->member, "NameAcquired") == 0)
+			dbus_message->name_acquired = dbus_message->arg0;
+		else if (strcmp(dbus_message->member, "NameLost") == 0)
+			dbus_message->name_lost = dbus_message->arg0;
+	}
+
+	return 0;
+}
diff --git a/net/netfilter/nfdbus/message.h b/net/netfilter/nfdbus/message.h
new file mode 100644
index 0000000..e3ea4d3
--- /dev/null
+++ b/net/netfilter/nfdbus/message.h
@@ -0,0 +1,71 @@
+/*
+ * message.h  Basic D-Bus message parsing
+ *
+ * Copyright (C) 2010  Collabora Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef DBUS_MESSAGE_H
+#define DBUS_MESSAGE_H
+
+#include <linux/list.h>
+
+#define DBUS_MAXIMUM_MATCH_RULE_LENGTH 1024
+
+/* Types of message */
+
+#define DBUS_MESSAGE_TYPE_INVALID       0
+#define DBUS_MESSAGE_TYPE_METHOD_CALL   1
+#define DBUS_MESSAGE_TYPE_METHOD_RETURN 2
+#define DBUS_MESSAGE_TYPE_ERROR         3
+#define DBUS_MESSAGE_TYPE_SIGNAL        4
+#define DBUS_NUM_MESSAGE_TYPES          5
+
+/* No need to implement a feature-complete parser. It only implement what is
+ * needed by the bus. */
+struct dbus_message {
+	char *message;
+	size_t len;
+	size_t new_len;
+
+	/* direct pointers to the fields */
+	int type;
+	char *path;
+	char *interface;
+	char *member;
+	char *destination;
+	char *sender;
+	char *body_signature;
+	int body_length;
+	char *arg0;
+	char *arg1;
+	char *arg2;
+	char *name_acquired;
+	char *name_lost;
+
+	/* How to add the 'sender' field in the headers */
+	int new_header_offset;
+	int len_offset;
+	int padding_end;
+};
+
+int dbus_message_type_from_string(const char *type_str);
+
+int dbus_message_parse(unsigned char *message, size_t len,
+		       struct dbus_message *dbus_message);
+
+#endif /* DBUS_MESSAGE_H */
-- 
1.7.10

^ permalink raw reply related

* [PATCH net-next 12/15] netlink: connector: Add idx and val identifiers for netfilter D-Bus
From: Vincent Sanders @ 2012-06-29 16:45 UTC (permalink / raw)
  To: netdev, linux-kernel, David S. Miller
  Cc: Javier Martinez Canillas, Alban Crequy
In-Reply-To: <1340988354-26981-1-git-send-email-vincent.sanders@collabora.co.uk>

From: Javier Martinez Canillas <javier.martinez@collabora.co.uk>

The D-bus IPC system implements a transport that uses AF_BUS sockets to
send D-Bus messages to the peers. This allows decouple the routing logic
from the daemon and move it to the kernel which has the advantage of
reducing the number of context switches and the messages copied to
user-space.

A D-Bus protocol aware netfilter module decide which peer can recive a
given message based on a set of D-Bus match rules. These match rules
are set from user-space using the netlink connector API.

Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Signed-off-by: Alban Crequy <alban.crequy@collabora.co.uk>
---
 include/linux/connector.h |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/linux/connector.h b/include/linux/connector.h
index c27be60..519d010 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -44,8 +44,10 @@
 #define CN_VAL_DRBD			0x1
 #define CN_KVP_IDX			0x9	/* HyperV KVP */
 #define CN_KVP_VAL			0x1	/* queries from the kernel */
+#define CN_IDX_NFDBUS                   0xA     /* netfilter D-Bus */
+#define CN_VAL_NFDBUS                   0x1

-#define CN_NETLINK_USERS		10	/* Highest index + 1 */
+#define CN_NETLINK_USERS		11	/* Highest index + 1 */

 /*
  * Maximum connector's message size.
-- 
1.7.10

^ permalink raw reply related

* [PATCH net-next 11/15] netlink: connector: implement cn_netlink_reply
From: Vincent Sanders @ 2012-06-29 16:45 UTC (permalink / raw)
  To: netdev, linux-kernel, David S. Miller; +Cc: Alban Crequy
In-Reply-To: <1340988354-26981-1-git-send-email-vincent.sanders@collabora.co.uk>

From: Alban Crequy <alban.crequy@collabora.co.uk>

In a connector callback, it was not possible to reply to a message only to a
sender. This patch implements cn_netlink_reply(). It uses the connector socket
to send an unicast netlink message back to the sender.

The following pseudo-code can be used from a connector callback:

        struct cn_msg *cn_reply;
        cn_reply = kzalloc(sizeof(struct cn_msg)
                + sizeof(struct ..._nl_cfg_reply), GFP_KERNEL);

        cn_reply->id = msg->id;
        cn_reply->seq = msg->seq;
        cn_reply->ack = msg->ack  + 1;
        cn_reply->len = sizeof(struct ..._nl_cfg_reply);
        cn_reply->flags = 0;

        rr = cn_netlink_reply(cn_reply, nsp->pid, GFP_KERNEL);

Signed-off-by: Alban Crequy <alban.crequy@collabora.co.uk>
---
 drivers/connector/connector.c |   32 ++++++++++++++++++++++++++++++++
 include/linux/connector.h     |    1 +
 2 files changed, 33 insertions(+)

diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 34e0e9e..a728d33 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -118,6 +118,38 @@ int cn_netlink_send(struct cn_msg *msg, u32 __group, gfp_t gfp_mask)
 EXPORT_SYMBOL_GPL(cn_netlink_send);
 
 /*
+ * Send an unicast reply from a connector callback
+ *
+ */
+int cn_netlink_reply(struct cn_msg *msg, u32 pid, gfp_t gfp_mask)
+{
+	unsigned int size;
+	struct sk_buff *skb;
+	struct nlmsghdr *nlh;
+	struct cn_msg *data;
+	struct cn_dev *dev = &cdev;
+
+	size = NLMSG_SPACE(sizeof(*msg) + msg->len);
+
+	skb = alloc_skb(size, gfp_mask);
+	if (!skb)
+		return -ENOMEM;
+
+	nlh = nlmsg_put(skb, 0, msg->seq, NLMSG_DONE, size - sizeof(*nlh), 0);
+	if (nlh == NULL) {
+		kfree_skb(skb);
+		return -EMSGSIZE;
+	}
+
+	data = nlmsg_data(nlh);
+
+	memcpy(data, msg, sizeof(*data) + msg->len);
+
+	return netlink_unicast(dev->nls, skb, pid, 1);
+}
+EXPORT_SYMBOL_GPL(cn_netlink_reply);
+
+/*
  * Callback helper - queues work and setup destructor for given data.
  */
 static int cn_call_callback(struct sk_buff *skb)
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 7638407..c27be60 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -125,6 +125,7 @@ int cn_add_callback(struct cb_id *id, const char *name,
 		    void (*callback)(struct cn_msg *, struct netlink_skb_parms *));
 void cn_del_callback(struct cb_id *);
 int cn_netlink_send(struct cn_msg *, u32, gfp_t);
+int cn_netlink_reply(struct cn_msg *, u32, gfp_t);
 
 int cn_queue_add_callback(struct cn_queue_dev *dev, const char *name,
 			  struct cb_id *id,
-- 
1.7.10

^ permalink raw reply related

* [PATCH net-next 10/15] net: bus: Add the AF_BUS socket address family to KBuild
From: Vincent Sanders @ 2012-06-29 16:45 UTC (permalink / raw)
  To: netdev, linux-kernel, David S. Miller
  Cc: Javier Martinez Canillas, Vincent Sanders
In-Reply-To: <1340988354-26981-1-git-send-email-vincent.sanders@collabora.co.uk>

From: Javier Martinez Canillas <javier.martinez@collabora.co.uk>

This patch adds the AF_BUS code to the Linux Kernel build system.

Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Signed-off-by: Vincent Sanders <vincent.sanders@collabora.co.uk>
---
 net/Kconfig      |    1 +
 net/Makefile     |    1 +
 net/bus/Kconfig  |   15 +++++++++++++++
 net/bus/Makefile |    7 +++++++
 4 files changed, 24 insertions(+)
 create mode 100644 net/bus/Kconfig
 create mode 100644 net/bus/Makefile

diff --git a/net/Kconfig b/net/Kconfig
index 245831b..339a630 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -47,6 +47,7 @@ menu "Networking options"
 
 source "net/packet/Kconfig"
 source "net/unix/Kconfig"
+source "net/bus/Kconfig"
 source "net/xfrm/Kconfig"
 source "net/iucv/Kconfig"
 
diff --git a/net/Makefile b/net/Makefile
index 4f4ee08..ad0e900 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_NETFILTER)		+= netfilter/
 obj-$(CONFIG_INET)		+= ipv4/
 obj-$(CONFIG_XFRM)		+= xfrm/
 obj-$(CONFIG_UNIX)		+= unix/
+obj-$(CONFIG_AF_BUS)		+= bus/
 obj-$(CONFIG_NET)		+= ipv6/
 obj-$(CONFIG_PACKET)		+= packet/
 obj-$(CONFIG_NET_KEY)		+= key/
diff --git a/net/bus/Kconfig b/net/bus/Kconfig
new file mode 100644
index 0000000..5f01410
--- /dev/null
+++ b/net/bus/Kconfig
@@ -0,0 +1,15 @@
+#
+# Bus Domain Sockets
+#
+
+config AF_BUS
+	tristate "Bus domain sockets (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	---help---
+	  If you say Y here, you will include support for Bus domain sockets.
+	  These sockets are used to create communication buses for IPC.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called bus.
+
+	  Say N unless you know what you are doing.
diff --git a/net/bus/Makefile b/net/bus/Makefile
new file mode 100644
index 0000000..8c1fea2
--- /dev/null
+++ b/net/bus/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the Linux bus domain socket layer.
+#
+
+obj-$(CONFIG_AF_BUS)	+= af-bus.o
+
+af-bus-y		:= af_bus.o garbage.o
-- 
1.7.10

^ permalink raw reply related

* [PATCH net-next 09/15] net: bus: Add garbage collector for AF_BUS sockets.
From: Vincent Sanders @ 2012-06-29 16:45 UTC (permalink / raw)
  To: netdev, linux-kernel, David S. Miller
  Cc: Javier Martinez Canillas, Vincent Sanders
In-Reply-To: <1340988354-26981-1-git-send-email-vincent.sanders@collabora.co.uk>

From: Javier Martinez Canillas <javier.martinez@collabora.co.uk>

This patch adds a garbage collector for AF_BUS sockets.

Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Signed-off-by: Vincent Sanders <vincent.sanders@collabora.co.uk>
---
 net/bus/garbage.c |  322 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 322 insertions(+)
 create mode 100644 net/bus/garbage.c

diff --git a/net/bus/garbage.c b/net/bus/garbage.c
new file mode 100644
index 0000000..2435f38
--- /dev/null
+++ b/net/bus/garbage.c
@@ -0,0 +1,322 @@
+/*
+ * Garbage Collector For AF_BUS sockets
+ *
+ * Based on Garbage Collector For AF_UNIX sockets (net/unix/garbage.c).
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/un.h>
+#include <linux/net.h>
+#include <linux/fs.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/file.h>
+#include <linux/proc_fs.h>
+#include <linux/mutex.h>
+#include <linux/wait.h>
+
+#include <net/sock.h>
+#include <net/af_bus.h>
+#include <net/scm.h>
+#include <net/tcp_states.h>
+
+/* Internal data structures and random procedures: */
+
+static LIST_HEAD(gc_inflight_list);
+static LIST_HEAD(gc_candidates);
+static DEFINE_SPINLOCK(bus_gc_lock);
+static DECLARE_WAIT_QUEUE_HEAD(bus_gc_wait);
+
+unsigned int bus_tot_inflight;
+
+
+struct sock *bus_get_socket(struct file *filp)
+{
+	struct sock *u_sock = NULL;
+	struct inode *inode = filp->f_path.dentry->d_inode;
+
+	/*
+	 *	Socket ?
+	 */
+	if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
+		struct socket *sock = SOCKET_I(inode);
+		struct sock *s = sock->sk;
+
+		/*
+		 *	PF_BUS ?
+		 */
+		if (s && sock->ops && sock->ops->family == PF_BUS)
+			u_sock = s;
+	}
+	return u_sock;
+}
+
+/*
+ *	Keep the number of times in flight count for the file
+ *	descriptor if it is for an AF_BUS socket.
+ */
+
+void bus_inflight(struct file *fp)
+{
+	struct sock *s = bus_get_socket(fp);
+	if (s) {
+		struct bus_sock *u = bus_sk(s);
+		spin_lock(&bus_gc_lock);
+		if (atomic_long_inc_return(&u->inflight) == 1) {
+			BUG_ON(!list_empty(&u->link));
+			list_add_tail(&u->link, &gc_inflight_list);
+		} else {
+			BUG_ON(list_empty(&u->link));
+		}
+		bus_tot_inflight++;
+		spin_unlock(&bus_gc_lock);
+	}
+}
+
+void bus_notinflight(struct file *fp)
+{
+	struct sock *s = bus_get_socket(fp);
+	if (s) {
+		struct bus_sock *u = bus_sk(s);
+		spin_lock(&bus_gc_lock);
+		BUG_ON(list_empty(&u->link));
+		if (atomic_long_dec_and_test(&u->inflight))
+			list_del_init(&u->link);
+		bus_tot_inflight--;
+		spin_unlock(&bus_gc_lock);
+	}
+}
+
+static void scan_inflight(struct sock *x, void (*func)(struct bus_sock *),
+			  struct sk_buff_head *hitlist)
+{
+	struct sk_buff *skb;
+	struct sk_buff *next;
+
+	spin_lock(&x->sk_receive_queue.lock);
+	skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
+		/*
+		 *	Do we have file descriptors ?
+		 */
+		if (BUSCB(skb).fp) {
+			bool hit = false;
+			/*
+			 *	Process the descriptors of this socket
+			 */
+			int nfd = BUSCB(skb).fp->count;
+			struct file **fp = BUSCB(skb).fp->fp;
+			while (nfd--) {
+				/*
+				 *	Get the socket the fd matches
+				 *	if it indeed does so
+				 */
+				struct sock *sk = bus_get_socket(*fp++);
+				if (sk) {
+					struct bus_sock *u = bus_sk(sk);
+
+					/*
+					 * Ignore non-candidates, they could
+					 * have been added to the queues after
+					 * starting the garbage collection
+					 */
+					if (u->gc_candidate) {
+						hit = true;
+						func(u);
+					}
+				}
+			}
+			if (hit && hitlist != NULL) {
+				__skb_unlink(skb, &x->sk_receive_queue);
+				__skb_queue_tail(hitlist, skb);
+			}
+		}
+	}
+	spin_unlock(&x->sk_receive_queue.lock);
+}
+
+static void scan_children(struct sock *x, void (*func)(struct bus_sock *),
+			  struct sk_buff_head *hitlist)
+{
+	if (x->sk_state != TCP_LISTEN)
+		scan_inflight(x, func, hitlist);
+	else {
+		struct sk_buff *skb;
+		struct sk_buff *next;
+		struct bus_sock *u;
+		LIST_HEAD(embryos);
+
+		/*
+		 * For a listening socket collect the queued embryos
+		 * and perform a scan on them as well.
+		 */
+		spin_lock(&x->sk_receive_queue.lock);
+		skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
+			u = bus_sk(skb->sk);
+
+			/*
+			 * An embryo cannot be in-flight, so it's safe
+			 * to use the list link.
+			 */
+			BUG_ON(!list_empty(&u->link));
+			list_add_tail(&u->link, &embryos);
+		}
+		spin_unlock(&x->sk_receive_queue.lock);
+
+		while (!list_empty(&embryos)) {
+			u = list_entry(embryos.next, struct bus_sock, link);
+			scan_inflight(&u->sk, func, hitlist);
+			list_del_init(&u->link);
+		}
+	}
+}
+
+static void dec_inflight(struct bus_sock *usk)
+{
+	atomic_long_dec(&usk->inflight);
+}
+
+static void inc_inflight(struct bus_sock *usk)
+{
+	atomic_long_inc(&usk->inflight);
+}
+
+static void inc_inflight_move_tail(struct bus_sock *u)
+{
+	atomic_long_inc(&u->inflight);
+	/*
+	 * If this still might be part of a cycle, move it to the end
+	 * of the list, so that it's checked even if it was already
+	 * passed over
+	 */
+	if (u->gc_maybe_cycle)
+		list_move_tail(&u->link, &gc_candidates);
+}
+
+static bool gc_in_progress = false;
+#define BUS_INFLIGHT_TRIGGER_GC 16000
+
+void wait_for_bus_gc(void)
+{
+	/*
+	 * If number of inflight sockets is insane,
+	 * force a garbage collect right now.
+	 */
+	if (bus_tot_inflight > BUS_INFLIGHT_TRIGGER_GC && !gc_in_progress)
+		bus_gc();
+	wait_event(bus_gc_wait, gc_in_progress == false);
+}
+
+/* The external entry point: bus_gc() */
+void bus_gc(void)
+{
+	struct bus_sock *u;
+	struct bus_sock *next;
+	struct sk_buff_head hitlist;
+	struct list_head cursor;
+	LIST_HEAD(not_cycle_list);
+
+	spin_lock(&bus_gc_lock);
+
+	/* Avoid a recursive GC. */
+	if (gc_in_progress)
+		goto out;
+
+	gc_in_progress = true;
+	/*
+	 * First, select candidates for garbage collection.  Only
+	 * in-flight sockets are considered, and from those only ones
+	 * which don't have any external reference.
+	 *
+	 * Holding bus_gc_lock will protect these candidates from
+	 * being detached, and hence from gaining an external
+	 * reference.  Since there are no possible receivers, all
+	 * buffers currently on the candidates' queues stay there
+	 * during the garbage collection.
+	 *
+	 * We also know that no new candidate can be added onto the
+	 * receive queues.  Other, non candidate sockets _can_ be
+	 * added to queue, so we must make sure only to touch
+	 * candidates.
+	 */
+	list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
+		long total_refs;
+		long inflight_refs;
+
+		total_refs = file_count(u->sk.sk_socket->file);
+		inflight_refs = atomic_long_read(&u->inflight);
+
+		BUG_ON(inflight_refs < 1);
+		BUG_ON(total_refs < inflight_refs);
+		if (total_refs == inflight_refs) {
+			list_move_tail(&u->link, &gc_candidates);
+			u->gc_candidate = 1;
+			u->gc_maybe_cycle = 1;
+		}
+	}
+
+	/*
+	 * Now remove all internal in-flight reference to children of
+	 * the candidates.
+	 */
+	list_for_each_entry(u, &gc_candidates, link)
+		scan_children(&u->sk, dec_inflight, NULL);
+
+	/*
+	 * Restore the references for children of all candidates,
+	 * which have remaining references.  Do this recursively, so
+	 * only those remain, which form cyclic references.
+	 *
+	 * Use a "cursor" link, to make the list traversal safe, even
+	 * though elements might be moved about.
+	 */
+	list_add(&cursor, &gc_candidates);
+	while (cursor.next != &gc_candidates) {
+		u = list_entry(cursor.next, struct bus_sock, link);
+
+		/* Move cursor to after the current position. */
+		list_move(&cursor, &u->link);
+
+		if (atomic_long_read(&u->inflight) > 0) {
+			list_move_tail(&u->link, &not_cycle_list);
+			u->gc_maybe_cycle = 0;
+			scan_children(&u->sk, inc_inflight_move_tail, NULL);
+		}
+	}
+	list_del(&cursor);
+
+	/*
+	 * not_cycle_list contains those sockets which do not make up a
+	 * cycle.  Restore these to the inflight list.
+	 */
+	while (!list_empty(&not_cycle_list)) {
+		u = list_entry(not_cycle_list.next, struct bus_sock, link);
+		u->gc_candidate = 0;
+		list_move_tail(&u->link, &gc_inflight_list);
+	}
+
+	/*
+	 * Now gc_candidates contains only garbage.  Restore original
+	 * inflight counters for these as well, and remove the skbuffs
+	 * which are creating the cycle(s).
+	 */
+	skb_queue_head_init(&hitlist);
+	list_for_each_entry(u, &gc_candidates, link)
+	scan_children(&u->sk, inc_inflight, &hitlist);
+
+	spin_unlock(&bus_gc_lock);
+
+	/* Here we are. Hitlist is filled. Die. */
+	__skb_queue_purge(&hitlist);
+
+	spin_lock(&bus_gc_lock);
+
+	/* All candidates should have been detached by now. */
+	BUG_ON(!list_empty(&gc_candidates));
+	gc_in_progress = false;
+	wake_up(&bus_gc_wait);
+
+ out:
+	spin_unlock(&bus_gc_lock);
+}
-- 
1.7.10

^ permalink raw reply related

* [PATCH net-next 08/15] net: bus: Add implementation of Bus domain sockets
From: Vincent Sanders @ 2012-06-29 16:45 UTC (permalink / raw)
  To: netdev, linux-kernel, David S. Miller
  Cc: Javier Martinez Canillas, Vincent Sanders
In-Reply-To: <1340988354-26981-1-git-send-email-vincent.sanders@collabora.co.uk>

From: Javier Martinez Canillas <javier.martinez@collabora.co.uk>

This is the core impolementation of the AF_BUS socket family its
design and operation are fully covered in
Documentation/networking/af_bus.txt

Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Signed-off-by: Vincent Sanders <vincent.sanders@collabora.co.uk>
---
 net/bus/af_bus.c | 2629 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 2629 insertions(+)
 create mode 100644 net/bus/af_bus.c

diff --git a/net/bus/af_bus.c b/net/bus/af_bus.c
new file mode 100644
index 0000000..0b79754
--- /dev/null
+++ b/net/bus/af_bus.c
@@ -0,0 +1,2629 @@
+/*
+ * Implementation of Bus domain sockets.
+ *
+ * Copyright (c) 2012, GENIVI Alliance
+ *
+ * Authors:	Javier Martinez Canillas <javier.martinez@collabora.co.uk>
+ *              Alban Crequy <alban.crequy@collabora.co.uk>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Based on BSD Unix domain sockets (net/unix).
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/dcache.h>
+#include <linux/namei.h>
+#include <linux/socket.h>
+#include <linux/bus.h>
+#include <linux/fcntl.h>
+#include <linux/termios.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/in.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/af_bus.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/scm.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/rtnetlink.h>
+#include <linux/mount.h>
+#include <net/checksum.h>
+#include <linux/security.h>
+
+struct hlist_head bus_socket_table[BUS_HASH_SIZE + 1];
+EXPORT_SYMBOL_GPL(bus_socket_table);
+struct hlist_head bus_address_table[BUS_HASH_SIZE];
+EXPORT_SYMBOL_GPL(bus_address_table);
+DEFINE_SPINLOCK(bus_table_lock);
+DEFINE_SPINLOCK(bus_address_lock);
+EXPORT_SYMBOL_GPL(bus_address_lock);
+static atomic_long_t bus_nr_socks;
+
+#define bus_sockets_unbound	(&bus_socket_table[BUS_HASH_SIZE])
+
+#define BUS_ABSTRACT(sk)	(bus_sk(sk)->addr->hash != BUS_HASH_SIZE)
+
+#ifdef CONFIG_SECURITY_NETWORK
+static void bus_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
+{
+	memcpy(BUSSID(skb), &scm->secid, sizeof(u32));
+}
+
+static inline void bus_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
+{
+	scm->secid = *BUSSID(skb);
+}
+#else
+static inline void bus_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
+{ }
+
+static inline void bus_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
+{ }
+#endif /* CONFIG_SECURITY_NETWORK */
+
+/*
+ *  SMP locking strategy:
+ *    bus_socket_table hash table is protected with spinlock bus_table_lock
+ *    bus_address_table hash table is protected with spinlock bus_address_lock
+ *    each bus is protected by a separate spin lock.
+ *    multicast atomic sending is protected by a separate spin lock.
+ *    each socket state is protected by a separate spin lock.
+ *    each socket address is protected by a separate spin lock.
+ *
+ *  When holding more than one lock, use the following hierarchy:
+ *  - bus_table_lock.
+ *  - bus_address_lock.
+ *  - socket lock.
+ *  - bus lock.
+ *  - bus send_lock.
+ *  - sock address lock.
+ */
+
+#define bus_peer(sk) (bus_sk(sk)->peer)
+
+static inline int bus_our_peer(struct sock *sk, struct sock *osk)
+{
+	return bus_peer(osk) == sk;
+}
+
+static inline int bus_recvq_full(struct sock const *sk)
+{
+	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
+}
+
+static inline u16 bus_addr_prefix(struct sockaddr_bus *busaddr)
+{
+	return (busaddr->sbus_addr.s_addr & BUS_PREFIX_MASK) >> BUS_CLIENT_BITS;
+}
+
+static inline u64 bus_addr_client(struct sockaddr_bus *sbusaddr)
+{
+	return sbusaddr->sbus_addr.s_addr & BUS_CLIENT_MASK;
+}
+
+static inline bool bus_mc_addr(struct sockaddr_bus *sbusaddr)
+{
+	return bus_addr_client(sbusaddr) == BUS_CLIENT_MASK;
+}
+
+struct sock *bus_peer_get(struct sock *s)
+{
+	struct sock *peer;
+
+	bus_state_lock(s);
+	peer = bus_peer(s);
+	if (peer)
+		sock_hold(peer);
+	bus_state_unlock(s);
+	return peer;
+}
+EXPORT_SYMBOL_GPL(bus_peer_get);
+
+static inline void bus_release_addr(struct bus_address *addr)
+{
+	if (atomic_dec_and_test(&addr->refcnt))
+		kfree(addr);
+}
+
+/*
+ *	Check bus socket name:
+ *		- should be not zero length.
+ *	        - if started by not zero, should be NULL terminated (FS object)
+ *		- if started by zero, it is abstract name.
+ */
+
+static int bus_mkname(struct sockaddr_bus *sbusaddr, int len,
+		      unsigned int *hashp)
+{
+	int offset = (sbusaddr->sbus_path[0] == '\0');
+
+	if (len <= sizeof(short) || len > sizeof(*sbusaddr))
+		return -EINVAL;
+	if (!sbusaddr || sbusaddr->sbus_family != AF_BUS)
+		return -EINVAL;
+
+	len = strnlen(sbusaddr->sbus_path + offset, BUS_PATH_MAX) + 1 +
+		sizeof(__kernel_sa_family_t) +
+		sizeof(struct bus_addr);
+
+	*hashp = bus_compute_hash(sbusaddr->sbus_addr);
+	return len;
+}
+
+static void __bus_remove_address(struct bus_address *addr)
+{
+	hlist_del(&addr->table_node);
+}
+
+static void __bus_insert_address(struct hlist_head *list,
+				 struct bus_address *addr)
+{
+	hlist_add_head(&addr->table_node, list);
+}
+
+static inline void bus_remove_address(struct bus_address *addr)
+{
+	spin_lock(&bus_address_lock);
+	__bus_remove_address(addr);
+	spin_unlock(&bus_address_lock);
+}
+
+static inline void bus_insert_address(struct hlist_head *list,
+				      struct bus_address *addr)
+{
+	spin_lock(&bus_address_lock);
+	__bus_insert_address(list, addr);
+	spin_unlock(&bus_address_lock);
+}
+
+static void __bus_remove_socket(struct sock *sk)
+{
+	sk_del_node_init(sk);
+}
+
+static void __bus_insert_socket(struct hlist_head *list, struct sock *sk)
+{
+	WARN_ON(!sk_unhashed(sk));
+	sk_add_node(sk, list);
+}
+
+static inline void bus_remove_socket(struct sock *sk)
+{
+	spin_lock(&bus_table_lock);
+	__bus_remove_socket(sk);
+	spin_unlock(&bus_table_lock);
+}
+
+static inline void bus_insert_socket(struct hlist_head *list, struct sock *sk)
+{
+	spin_lock(&bus_table_lock);
+	__bus_insert_socket(list, sk);
+	spin_unlock(&bus_table_lock);
+}
+
+static inline bool __bus_has_prefix(struct sock *sk, u16 prefix)
+{
+	struct bus_sock *u = bus_sk(sk);
+	struct bus_address *addr;
+	struct hlist_node *node;
+	bool ret = false;
+
+	hlist_for_each_entry(addr, node, &u->addr_list, addr_node) {
+		if (bus_addr_prefix(addr->name) == prefix)
+			ret = true;
+	}
+
+	return ret;
+}
+
+static inline bool bus_has_prefix(struct sock *sk, u16 prefix)
+{
+	bool ret;
+
+	bus_state_lock(sk);
+	ret = __bus_has_prefix(sk, prefix);
+	bus_state_unlock(sk);
+
+	return ret;
+}
+
+static inline bool __bus_eavesdropper(struct sock *sk, u16 condition)
+{
+	struct bus_sock *u = bus_sk(sk);
+
+	return u->eavesdropper;
+}
+
+static inline bool bus_eavesdropper(struct sock *sk, u16 condition)
+{
+	bool ret;
+
+	bus_state_lock(sk);
+	ret = __bus_eavesdropper(sk, condition);
+	bus_state_unlock(sk);
+
+	return ret;
+}
+
+static inline bool bus_has_prefix_eavesdropper(struct sock *sk, u16 prefix)
+{
+	bool ret;
+
+	bus_state_lock(sk);
+	ret = __bus_has_prefix(sk, prefix) || __bus_eavesdropper(sk, 0);
+	bus_state_unlock(sk);
+
+	return ret;
+}
+
+static inline struct bus_address *__bus_get_address(struct sock *sk,
+						    struct bus_addr *sbus_addr)
+{
+	struct bus_sock *u = bus_sk(sk);
+	struct bus_address *addr = NULL;
+	struct hlist_node *node;
+
+	hlist_for_each_entry(addr, node, &u->addr_list, addr_node) {
+		if (addr->name->sbus_addr.s_addr == sbus_addr->s_addr)
+			return addr;
+	}
+
+	return NULL;
+}
+
+static inline struct bus_address *bus_get_address(struct sock *sk,
+						  struct bus_addr *sbus_addr)
+{
+	struct bus_address *addr;
+
+	bus_state_lock(sk);
+	addr = __bus_get_address(sk, sbus_addr);
+	bus_state_unlock(sk);
+
+	return addr;
+}
+
+static struct sock *__bus_find_socket_byname(struct net *net,
+					     struct sockaddr_bus *sbusname,
+					     int len, unsigned int hash)
+{
+	struct sock *s;
+	struct hlist_node *node;
+
+	sk_for_each(s, node, &bus_socket_table[hash]) {
+		struct bus_sock *u = bus_sk(s);
+
+		if (!net_eq(sock_net(s), net))
+			continue;
+
+		if (u->addr->len == len &&
+		    !memcmp(u->addr->name, sbusname, len))
+			return s;
+	}
+
+	return NULL;
+}
+
+static inline struct sock *bus_find_socket_byname(struct net *net,
+						  struct sockaddr_bus *sbusname,
+						  int len, unsigned int hash)
+{
+	struct sock *s;
+
+	spin_lock(&bus_table_lock);
+	s = __bus_find_socket_byname(net, sbusname, len, hash);
+	if (s)
+		sock_hold(s);
+	spin_unlock(&bus_table_lock);
+	return s;
+}
+
+static struct sock *__bus_find_socket_byaddress(struct net *net,
+						struct sockaddr_bus *sbusname,
+						int len, int protocol,
+						unsigned int hash)
+{
+	struct sock *s;
+	struct bus_address *addr;
+	struct hlist_node *node;
+	struct bus_sock *u;
+	int offset = (sbusname->sbus_path[0] == '\0');
+	int path_len = strnlen(sbusname->sbus_path + offset, BUS_PATH_MAX);
+
+	len = path_len + 1 + sizeof(__kernel_sa_family_t) +
+	      sizeof(struct bus_addr);
+
+	hlist_for_each_entry(addr, node, &bus_address_table[hash],
+			     table_node) {
+		s = addr->sock;
+		u = bus_sk(s);
+
+		if (s->sk_protocol != protocol)
+			continue;
+
+		if (!net_eq(sock_net(s), net))
+			continue;
+
+		if (addr->len == len &&
+		    addr->name->sbus_family == sbusname->sbus_family &&
+		    addr->name->sbus_addr.s_addr == sbusname->sbus_addr.s_addr
+		    && bus_same_bus(addr->name, sbusname))
+			goto found;
+	}
+	s = NULL;
+found:
+	return s;
+}
+
+static inline struct sock *bus_find_socket_byaddress(struct net *net,
+						     struct sockaddr_bus *name,
+						     int len, int protocol,
+						     unsigned int hash)
+{
+	struct sock *s;
+
+	spin_lock(&bus_address_lock);
+	s = __bus_find_socket_byaddress(net, name, len, protocol, hash);
+	if (s)
+		sock_hold(s);
+	spin_unlock(&bus_address_lock);
+	return s;
+}
+
+static inline int bus_writable(struct sock *sk)
+{
+	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
+}
+
+static void bus_write_space(struct sock *sk)
+{
+	struct bus_sock *u = bus_sk(sk);
+	struct bus_sock *p;
+	struct hlist_node *node;
+	struct socket_wq *wq;
+
+	if (bus_writable(sk)) {
+		rcu_read_lock();
+		wq = rcu_dereference(sk->sk_wq);
+		if (wq_has_sleeper(wq))
+			wake_up_interruptible_sync_poll(&wq->wait,
+				POLLOUT | POLLWRNORM | POLLWRBAND);
+		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+		rcu_read_unlock();
+
+		if (u && u->bus) {
+			spin_lock(&u->bus->lock);
+			hlist_for_each_entry(p, node, &u->bus->peers,
+					     bus_node) {
+				wake_up_interruptible_sync_poll(sk_sleep(&p->sk),
+								POLLOUT |
+								POLLWRNORM |
+								POLLWRBAND);
+				sk_wake_async(&p->sk, SOCK_WAKE_SPACE,
+					      POLL_OUT);
+			}
+			spin_unlock(&u->bus->lock);
+		}
+	}
+}
+
+static void bus_bus_release(struct kref *kref)
+{
+	struct bus *bus;
+
+	bus = container_of(kref, struct bus, kref);
+
+	kfree(bus);
+}
+
+static void bus_sock_destructor(struct sock *sk)
+{
+	struct bus_sock *u = bus_sk(sk);
+
+	skb_queue_purge(&sk->sk_receive_queue);
+
+	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+	WARN_ON(!sk_unhashed(sk));
+	WARN_ON(sk->sk_socket);
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		pr_info("Attempt to release alive bus socket: %p\n", sk);
+		return;
+	}
+
+	if (u->bus) {
+		kref_put(&u->bus->kref, bus_bus_release);
+		u->bus = NULL;
+	}
+
+	atomic_long_dec(&bus_nr_socks);
+	local_bh_disable();
+	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+	local_bh_enable();
+#ifdef BUS_REFCNT_DEBUG
+	pr_debug("BUS %p is destroyed, %ld are still alive.\n", sk,
+		 atomic_long_read(&bus_nr_socks));
+#endif
+}
+
+static int bus_release_sock(struct sock *sk, int embrion)
+{
+	struct bus_sock *u = bus_sk(sk);
+	struct path path;
+	struct sock *skpair;
+	struct sk_buff *skb;
+	int state;
+	struct bus_address *addr;
+	struct hlist_node *node, *tmp;
+
+	bus_remove_socket(sk);
+
+	if (u->bus && u->authenticated &&
+	    !u->bus_master && !u->bus_master_side) {
+		spin_lock(&u->bus->lock);
+		hlist_del(&u->bus_node);
+		if (u->eavesdropper)
+			atomic64_dec(&u->bus->eavesdropper_cnt);
+		spin_unlock(&u->bus->lock);
+	}
+
+	/* Clear state */
+	bus_state_lock(sk);
+	sock_orphan(sk);
+	sk->sk_shutdown = SHUTDOWN_MASK;
+	path	     = u->path;
+	u->path.dentry = NULL;
+	u->path.mnt = NULL;
+	state = sk->sk_state;
+	sk->sk_state = BUS_CLOSE;
+
+	if (u->bus_master)
+			u->bus->master = NULL;
+
+	if (u->bus_master_side) {
+		bus_release_addr(u->addr);
+		u->addr = NULL;
+	} else {
+		u->addr = NULL;
+
+		spin_lock(&bus_address_lock);
+		hlist_for_each_entry_safe(addr, node, tmp, &u->addr_list,
+					  addr_node) {
+			hlist_del(&addr->addr_node);
+			__bus_remove_address(addr);
+			bus_release_addr(addr);
+		}
+		spin_unlock(&bus_address_lock);
+	}
+
+	bus_state_unlock(sk);
+
+	wake_up_interruptible_all(&u->peer_wait);
+
+	skpair = bus_peer(sk);
+
+	if (skpair != NULL) {
+		bus_state_lock(skpair);
+		/* No more writes */
+		skpair->sk_shutdown = SHUTDOWN_MASK;
+		if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
+			skpair->sk_err = ECONNRESET;
+		bus_state_unlock(skpair);
+		skpair->sk_state_change(skpair);
+		sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
+		sock_put(skpair); /* It may now die */
+		bus_peer(sk) = NULL;
+	}
+
+	/* Try to flush out this socket. Throw out buffers at least */
+
+	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+		if (state == BUS_LISTEN)
+			bus_release_sock(skb->sk, 1);
+		/* passed fds are erased in the kfree_skb hook	      */
+		kfree_skb(skb);
+	}
+
+	if (path.dentry)
+		path_put(&path);
+
+	sock_put(sk);
+
+	/* ---- Socket is dead now and most probably destroyed ---- */
+
+	if (bus_tot_inflight)
+		bus_gc();		/* Garbage collect fds */
+
+	return 0;
+}
+
+static void init_peercred(struct sock *sk)
+{
+	put_pid(sk->sk_peer_pid);
+	if (sk->sk_peer_cred)
+		put_cred(sk->sk_peer_cred);
+	sk->sk_peer_pid  = get_pid(task_tgid(current));
+	sk->sk_peer_cred = get_current_cred();
+}
+
+static void copy_peercred(struct sock *sk, struct sock *peersk)
+{
+	put_pid(sk->sk_peer_pid);
+	if (sk->sk_peer_cred)
+		put_cred(sk->sk_peer_cred);
+	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
+	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
+}
+
+static int bus_listen(struct socket *sock, int backlog)
+{
+	int err;
+	struct sock *sk = sock->sk;
+	struct bus_sock *u = bus_sk(sk);
+	struct pid *old_pid = NULL;
+	const struct cred *old_cred = NULL;
+
+	err = -EINVAL;
+	if (!u->addr || !u->bus_master)
+		goto out;	/* Only listens on an bound an master socket */
+	bus_state_lock(sk);
+	if (sk->sk_state != BUS_CLOSE && sk->sk_state != BUS_LISTEN)
+		goto out_unlock;
+	if (backlog > sk->sk_max_ack_backlog)
+		wake_up_interruptible_all(&u->peer_wait);
+	sk->sk_max_ack_backlog	= backlog;
+	sk->sk_state		= BUS_LISTEN;
+	/* set credentials so connect can copy them */
+	init_peercred(sk);
+	err = 0;
+
+out_unlock:
+	bus_state_unlock(sk);
+	put_pid(old_pid);
+	if (old_cred)
+		put_cred(old_cred);
+out:
+	return err;
+}
+
+static int bus_release(struct socket *);
+static int bus_bind(struct socket *, struct sockaddr *, int);
+static int bus_connect(struct socket *, struct sockaddr *,
+			       int addr_len, int flags);
+static int bus_accept(struct socket *, struct socket *, int);
+static int bus_getname(struct socket *, struct sockaddr *, int *, int);
+static unsigned int bus_poll(struct file *, struct socket *,
+				    poll_table *);
+static int bus_ioctl(struct socket *, unsigned int, unsigned long);
+static int bus_shutdown(struct socket *, int);
+static int bus_setsockopt(struct socket *, int, int, char __user *,
+			   unsigned int);
+static int bus_sendmsg(struct kiocb *, struct socket *,
+		       struct msghdr *, size_t);
+static int bus_recvmsg(struct kiocb *, struct socket *,
+		       struct msghdr *, size_t, int);
+
+static void bus_set_peek_off(struct sock *sk, int val)
+{
+	struct bus_sock *u = bus_sk(sk);
+
+	mutex_lock(&u->readlock);
+	sk->sk_peek_off = val;
+	mutex_unlock(&u->readlock);
+}
+
+static const struct proto_ops bus_seqpacket_ops = {
+	.family =	PF_BUS,
+	.owner =	THIS_MODULE,
+	.release =	bus_release,
+	.bind =		bus_bind,
+	.connect =	bus_connect,
+	.socketpair =	sock_no_socketpair,
+	.accept =	bus_accept,
+	.getname =	bus_getname,
+	.poll =		bus_poll,
+	.ioctl =	bus_ioctl,
+	.listen =	bus_listen,
+	.shutdown =	bus_shutdown,
+	.setsockopt =	bus_setsockopt,
+	.getsockopt =	sock_no_getsockopt,
+	.sendmsg =	bus_sendmsg,
+	.recvmsg =	bus_recvmsg,
+	.mmap =		sock_no_mmap,
+	.sendpage =	sock_no_sendpage,
+	.set_peek_off =	bus_set_peek_off,
+};
+
+static struct proto bus_proto = {
+	.name			= "BUS",
+	.owner			= THIS_MODULE,
+	.obj_size		= sizeof(struct bus_sock),
+};
+
+/*
+ * AF_BUS sockets do not interact with hardware, hence they
+ * dont trigger interrupts - so it's safe for them to have
+ * bh-unsafe locking for their sk_receive_queue.lock. Split off
+ * this special lock-class by reinitializing the spinlock key:
+ */
+static struct lock_class_key af_bus_sk_receive_queue_lock_key;
+
+static struct sock *bus_create1(struct net *net, struct socket *sock)
+{
+	struct sock *sk = NULL;
+	struct bus_sock *u;
+
+	atomic_long_inc(&bus_nr_socks);
+	if (atomic_long_read(&bus_nr_socks) > 2 * get_max_files())
+		goto out;
+
+	sk = sk_alloc(net, PF_BUS, GFP_KERNEL, &bus_proto);
+	if (!sk)
+		goto out;
+
+	sock_init_data(sock, sk);
+	lockdep_set_class(&sk->sk_receive_queue.lock,
+				&af_bus_sk_receive_queue_lock_key);
+
+	sk->sk_write_space	= bus_write_space;
+	sk->sk_max_ack_backlog	= BUS_MAX_QLEN;
+	sk->sk_destruct		= bus_sock_destructor;
+	u	  = bus_sk(sk);
+	u->path.dentry = NULL;
+	u->path.mnt = NULL;
+	u->bus = NULL;
+	u->bus_master = false;
+	u->authenticated = false;
+	u->eavesdropper = false;
+	spin_lock_init(&u->lock);
+	atomic_long_set(&u->inflight, 0);
+	INIT_LIST_HEAD(&u->link);
+	INIT_HLIST_HEAD(&u->addr_list);
+	INIT_HLIST_NODE(&u->bus_node);
+	mutex_init(&u->readlock); /* single task reading lock */
+	init_waitqueue_head(&u->peer_wait);
+	bus_insert_socket(bus_sockets_unbound, sk);
+out:
+	if (sk == NULL)
+		atomic_long_dec(&bus_nr_socks);
+	else {
+		local_bh_disable();
+		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+		local_bh_enable();
+	}
+	return sk;
+}
+
+static int bus_create(struct net *net, struct socket *sock, int protocol,
+		       int kern)
+{
+	struct sock *sk;
+
+	if (protocol < BUS_PROTO_NONE || protocol > BUS_PROTO_DBUS)
+		return -EPROTONOSUPPORT;
+
+	if (protocol != BUS_PROTO_NONE)
+		request_module("net-pf-%d-proto-%d", PF_BUS, protocol);
+
+	sock->state = SS_UNCONNECTED;
+
+	if (sock->type == SOCK_SEQPACKET)
+		sock->ops = &bus_seqpacket_ops;
+	else
+		return -ESOCKTNOSUPPORT;
+
+	sk = bus_create1(net, sock);
+	if (!sk)
+		return -ENOMEM;
+
+	sk->sk_protocol = protocol;
+
+	return 0;
+}
+
+static int bus_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	if (!sk)
+		return 0;
+
+	sock->sk = NULL;
+
+	return bus_release_sock(sk, 0);
+}
+
+static struct sock *bus_find_other(struct net *net,
+				   struct sockaddr_bus *sbusname, int len,
+				   int protocol, unsigned int hash, int *error)
+{
+	struct sock *u;
+	struct path path;
+	int err = 0;
+
+	if (sbusname->sbus_path[0]) {
+		struct inode *inode;
+		err = kern_path(sbusname->sbus_path, LOOKUP_FOLLOW, &path);
+		if (err)
+			goto fail;
+		inode = path.dentry->d_inode;
+		err = inode_permission(inode, MAY_WRITE);
+		if (err)
+			goto put_fail;
+
+		err = -ECONNREFUSED;
+		if (!S_ISSOCK(inode->i_mode))
+			goto put_fail;
+		u = bus_find_socket_byaddress(net, sbusname, len, protocol,
+					      hash);
+		if (!u)
+			goto put_fail;
+
+		touch_atime(&path);
+		path_put(&path);
+
+	} else {
+		err = -ECONNREFUSED;
+		u = bus_find_socket_byaddress(net, sbusname, len, protocol, hash);
+		if (u) {
+			struct dentry *dentry;
+			dentry = bus_sk(u)->path.dentry;
+			if (dentry)
+				touch_atime(&bus_sk(u)->path);
+		} else
+			goto fail;
+	}
+
+	return u;
+
+put_fail:
+	path_put(&path);
+fail:
+	*error = err;
+	return NULL;
+}
+
+
+static int bus_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+	struct sock *sk = sock->sk;
+	struct net *net = sock_net(sk);
+	struct bus_sock *u = bus_sk(sk);
+	struct sockaddr_bus *sbusaddr = (struct sockaddr_bus *)uaddr;
+	char *sbus_path = sbusaddr->sbus_path;
+	struct dentry *dentry = NULL;
+	struct path path;
+	int err;
+	unsigned int hash;
+	struct bus_address *addr;
+	struct hlist_head *list;
+	struct bus *bus;
+
+	err = -EINVAL;
+	if (sbusaddr->sbus_family != AF_BUS)
+		goto out;
+
+	/* If the address is available, the socket is the bus master */
+	sbusaddr->sbus_addr.s_addr = BUS_MASTER_ADDR;
+
+	err = bus_mkname(sbusaddr, addr_len, &hash);
+	if (err < 0)
+		goto out;
+	addr_len = err;
+
+	mutex_lock(&u->readlock);
+
+	err = -EINVAL;
+	if (u->addr)
+		goto out_up;
+
+	err = -ENOMEM;
+	addr = kzalloc(sizeof(*addr) + sizeof(struct sockaddr_bus), GFP_KERNEL);
+	if (!addr)
+		goto out_up;
+
+	memcpy(addr->name, sbusaddr, sizeof(struct sockaddr_bus));
+	addr->len = addr_len;
+	addr->hash = hash;
+	atomic_set(&addr->refcnt, 1);
+	addr->sock = sk;
+	INIT_HLIST_NODE(&addr->addr_node);
+	INIT_HLIST_NODE(&addr->table_node);
+
+	if (sbus_path[0]) {
+		umode_t mode;
+		err = 0;
+		/*
+		 * Get the parent directory, calculate the hash for last
+		 * component.
+		 */
+		dentry = kern_path_create(AT_FDCWD, sbus_path, &path, 0);
+		err = PTR_ERR(dentry);
+		if (IS_ERR(dentry))
+			goto out_mknod_parent;
+
+		/*
+		 * All right, let's create it.
+		 */
+		mode = S_IFSOCK |
+		       (SOCK_INODE(sock)->i_mode & ~current_umask());
+		err = mnt_want_write(path.mnt);
+		if (err)
+			goto out_mknod_dput;
+		err = security_path_mknod(&path, dentry, mode, 0);
+		if (err)
+			goto out_mknod_drop_write;
+		err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
+out_mknod_drop_write:
+		mnt_drop_write(path.mnt);
+		if (err)
+			goto out_mknod_dput;
+		mutex_unlock(&path.dentry->d_inode->i_mutex);
+		dput(path.dentry);
+		path.dentry = dentry;
+	}
+
+	err = -ENOMEM;
+	bus = kzalloc(sizeof(*bus), GFP_KERNEL);
+	if (!bus)
+		goto out_unlock;
+
+	spin_lock(&bus_table_lock);
+
+	if (!sbus_path[0]) {
+		err = -EADDRINUSE;
+		if (__bus_find_socket_byname(net, sbusaddr, addr_len, hash)) {
+			bus_release_addr(addr);
+			kfree(bus);
+			goto out_unlock;
+		}
+
+		list = &bus_socket_table[addr->hash];
+	} else {
+		list = &bus_socket_table[dentry->d_inode->i_ino &
+					 (BUS_HASH_SIZE-1)];
+		u->path = path;
+	}
+
+	kref_init(&bus->kref);
+	bus->master = sk;
+	INIT_HLIST_HEAD(&bus->peers);
+	spin_lock_init(&bus->lock);
+	spin_lock_init(&bus->send_lock);
+	atomic64_set(&bus->addr_cnt, 0);
+	atomic64_set(&bus->eavesdropper_cnt, 0);
+
+	hlist_add_head(&addr->addr_node, &u->addr_list);
+
+	err = 0;
+	__bus_remove_socket(sk);
+	u->addr = addr;
+	u->bus_master = true;
+	u->bus = bus;
+	__bus_insert_socket(list, sk);
+	bus_insert_address(&bus_address_table[addr->hash], addr);
+
+out_unlock:
+	spin_unlock(&bus_table_lock);
+out_up:
+	mutex_unlock(&u->readlock);
+out:
+	return err;
+
+out_mknod_dput:
+	dput(dentry);
+	mutex_unlock(&path.dentry->d_inode->i_mutex);
+	path_put(&path);
+out_mknod_parent:
+	if (err == -EEXIST)
+		err = -EADDRINUSE;
+	bus_release_addr(addr);
+	goto out_up;
+}
+
+static long bus_wait_for_peer(struct sock *other, long timeo)
+{
+	struct bus_sock *u = bus_sk(other);
+	int sched;
+	DEFINE_WAIT(wait);
+
+	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
+
+	sched = !sock_flag(other, SOCK_DEAD) &&
+		!(other->sk_shutdown & RCV_SHUTDOWN) &&
+		bus_recvq_full(other);
+
+	bus_state_unlock(other);
+
+	if (sched)
+		timeo = schedule_timeout(timeo);
+
+	finish_wait(&u->peer_wait, &wait);
+	return timeo;
+}
+
+static int bus_connect(struct socket *sock, struct sockaddr *uaddr,
+			       int addr_len, int flags)
+{
+	struct sockaddr_bus *sbusaddr = (struct sockaddr_bus *)uaddr;
+	struct sock *sk = sock->sk;
+	struct net *net = sock_net(sk);
+	struct bus_sock *u = bus_sk(sk), *newu, *otheru;
+	struct sock *newsk = NULL;
+	struct sock *other = NULL;
+	struct sk_buff *skb = NULL;
+	struct bus_address *addr = NULL;
+	unsigned int hash;
+	int st;
+	int err;
+	long timeo;
+
+	/* Only connections to the bus master is allowed */
+	sbusaddr->sbus_addr.s_addr = BUS_MASTER_ADDR;
+
+	err = bus_mkname(sbusaddr, addr_len, &hash);
+	if (err < 0)
+		goto out;
+	addr_len = err;
+
+	err = -ENOMEM;
+	addr = kzalloc(sizeof(*addr) + sizeof(struct sockaddr_bus), GFP_KERNEL);
+	if (!addr)
+		goto out;
+
+	atomic_set(&addr->refcnt, 1);
+	INIT_HLIST_NODE(&addr->addr_node);
+	INIT_HLIST_NODE(&addr->table_node);
+
+	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
+
+	/* First of all allocate resources.
+	   If we will make it after state is locked,
+	   we will have to recheck all again in any case.
+	 */
+
+	err = -ENOMEM;
+
+	/* create new sock for complete connection */
+	newsk = bus_create1(sock_net(sk), NULL);
+	if (newsk == NULL)
+		goto out;
+
+	/* Allocate skb for sending to listening sock */
+	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
+	if (skb == NULL)
+		goto out;
+
+restart:
+	/*  Find listening sock. */
+	other = bus_find_other(net, sbusaddr, addr_len, sk->sk_protocol, hash,
+			       &err);
+	if (!other)
+		goto out;
+
+	/* Latch state of peer */
+	bus_state_lock(other);
+
+	/* Apparently VFS overslept socket death. Retry. */
+	if (sock_flag(other, SOCK_DEAD)) {
+		bus_state_unlock(other);
+		sock_put(other);
+		goto restart;
+	}
+
+	err = -ECONNREFUSED;
+	if (other->sk_state != BUS_LISTEN)
+		goto out_unlock;
+	if (other->sk_shutdown & RCV_SHUTDOWN)
+		goto out_unlock;
+
+	if (bus_recvq_full(other)) {
+		err = -EAGAIN;
+		if (!timeo)
+			goto out_unlock;
+
+		timeo = bus_wait_for_peer(other, timeo);
+
+		err = sock_intr_errno(timeo);
+		if (signal_pending(current))
+			goto out;
+		sock_put(other);
+		goto restart;
+	}
+
+	/* Latch our state.
+
+	   It is tricky place. We need to grab our state lock and cannot
+	   drop lock on peer. It is dangerous because deadlock is
+	   possible. Connect to self case and simultaneous
+	   attempt to connect are eliminated by checking socket
+	   state. other is BUS_LISTEN, if sk is BUS_LISTEN we
+	   check this before attempt to grab lock.
+
+	   Well, and we have to recheck the state after socket locked.
+	 */
+	st = sk->sk_state;
+
+	switch (st) {
+	case BUS_CLOSE:
+		/* This is ok... continue with connect */
+		break;
+	case BUS_ESTABLISHED:
+		/* Socket is already connected */
+		err = -EISCONN;
+		goto out_unlock;
+	default:
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	bus_state_lock_nested(sk);
+
+	if (sk->sk_state != st) {
+		bus_state_unlock(sk);
+		bus_state_unlock(other);
+		sock_put(other);
+		goto restart;
+	}
+
+	err = security_bus_connect(sk, other, newsk);
+	if (err) {
+		bus_state_unlock(sk);
+		goto out_unlock;
+	}
+
+	/* The way is open! Fastly set all the necessary fields... */
+
+	sock_hold(sk);
+	bus_peer(newsk)	= sk;
+	newsk->sk_state		= BUS_ESTABLISHED;
+	newsk->sk_type		= sk->sk_type;
+	newsk->sk_protocol	= sk->sk_protocol;
+	init_peercred(newsk);
+	newu = bus_sk(newsk);
+	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
+	otheru = bus_sk(other);
+
+	/* copy address information from listening to new sock*/
+	if (otheru->addr && otheru->bus_master) {
+		atomic_inc(&otheru->addr->refcnt);
+		newu->addr = otheru->addr;
+		memcpy(addr->name, otheru->addr->name,
+		       sizeof(struct sockaddr_bus));
+		addr->len = otheru->addr->len;
+		addr->name->sbus_addr.s_addr =
+			(atomic64_inc_return(&otheru->bus->addr_cnt) &
+			 BUS_CLIENT_MASK);
+		addr->hash = bus_compute_hash(addr->name->sbus_addr);
+		addr->sock = sk;
+		u->addr = addr;
+		kref_get(&otheru->bus->kref);
+		u->bus = otheru->bus;
+		u->bus_master_side = false;
+		kref_get(&otheru->bus->kref);
+		newu->bus = otheru->bus;
+		newu->bus_master_side = true;
+		hlist_add_head(&addr->addr_node, &u->addr_list);
+
+		bus_insert_address(&bus_address_table[addr->hash], addr);
+	}
+	if (otheru->path.dentry) {
+		path_get(&otheru->path);
+		newu->path = otheru->path;
+	}
+
+	/* Set credentials */
+	copy_peercred(sk, other);
+	sk->sk_sndbuf = other->sk_sndbuf;
+	sk->sk_max_ack_backlog	= other->sk_max_ack_backlog;
+	newsk->sk_sndbuf = other->sk_sndbuf;
+
+	sock->state	= SS_CONNECTED;
+	sk->sk_state	= BUS_ESTABLISHED;
+	sock_hold(newsk);
+
+	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
+	bus_peer(sk)	= newsk;
+
+	bus_state_unlock(sk);
+
+	/* take ten and and send info to listening sock */
+	spin_lock(&other->sk_receive_queue.lock);
+	__skb_queue_tail(&other->sk_receive_queue, skb);
+	spin_unlock(&other->sk_receive_queue.lock);
+	bus_state_unlock(other);
+	other->sk_data_ready(other, 0);
+	sock_put(other);
+	return 0;
+
+out_unlock:
+	if (other)
+		bus_state_unlock(other);
+
+out:
+	kfree_skb(skb);
+	if (addr)
+		bus_release_addr(addr);
+	if (newsk)
+		bus_release_sock(newsk, 0);
+	if (other)
+		sock_put(other);
+	return err;
+}
+
+static int bus_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sock *tsk;
+	struct sk_buff *skb;
+	int err;
+
+	err = -EINVAL;
+	if (sk->sk_state != BUS_LISTEN)
+		goto out;
+
+	/* If socket state is BUS_LISTEN it cannot change (for now...),
+	 * so that no locks are necessary.
+	 */
+
+	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
+	if (!skb) {
+		/* This means receive shutdown. */
+		if (err == 0)
+			err = -EINVAL;
+		goto out;
+	}
+
+	tsk = skb->sk;
+	skb_free_datagram(sk, skb);
+	wake_up_interruptible(&bus_sk(sk)->peer_wait);
+
+	/* attach accepted sock to socket */
+	bus_state_lock(tsk);
+	newsock->state = SS_CONNECTED;
+	sock_graft(tsk, newsock);
+	bus_state_unlock(tsk);
+	return 0;
+
+out:
+	return err;
+}
+
+
+static int bus_getname(struct socket *sock, struct sockaddr *uaddr,
+		       int *uaddr_len, int peer)
+{
+	struct sock *sk = sock->sk;
+	struct bus_sock *u;
+	DECLARE_SOCKADDR(struct sockaddr_bus *, sbusaddr, uaddr);
+	int err = 0;
+
+	if (peer) {
+		sk = bus_peer_get(sk);
+
+		err = -ENOTCONN;
+		if (!sk)
+			goto out;
+		err = 0;
+	} else {
+		sock_hold(sk);
+	}
+
+	u = bus_sk(sk);
+
+	bus_state_lock(sk);
+	if (!u->addr) {
+		sbusaddr->sbus_family = AF_BUS;
+		sbusaddr->sbus_path[0] = 0;
+		*uaddr_len = sizeof(short);
+	} else {
+		struct bus_address *addr = u->addr;
+
+		*uaddr_len = sizeof(struct sockaddr_bus);
+		memcpy(sbusaddr, addr->name, *uaddr_len);
+	}
+	bus_state_unlock(sk);
+	sock_put(sk);
+out:
+	return err;
+}
+
+static void bus_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+	int i;
+
+	scm->fp = BUSCB(skb).fp;
+	BUSCB(skb).fp = NULL;
+
+	for (i = scm->fp->count-1; i >= 0; i--)
+		bus_notinflight(scm->fp->fp[i]);
+}
+
+static void bus_destruct_scm(struct sk_buff *skb)
+{
+	struct scm_cookie scm;
+	memset(&scm, 0, sizeof(scm));
+	scm.pid  = BUSCB(skb).pid;
+	scm.cred = BUSCB(skb).cred;
+	if (BUSCB(skb).fp)
+		bus_detach_fds(&scm, skb);
+
+	scm_destroy(&scm);
+	if (skb->sk)
+		sock_wfree(skb);
+}
+
+#define MAX_RECURSION_LEVEL 4
+
+static int bus_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+	int i;
+	unsigned char max_level = 0;
+	int bus_sock_count = 0;
+
+	for (i = scm->fp->count - 1; i >= 0; i--) {
+		struct sock *sk = bus_get_socket(scm->fp->fp[i]);
+
+		if (sk) {
+			bus_sock_count++;
+			max_level = max(max_level,
+					bus_sk(sk)->recursion_level);
+		}
+	}
+	if (unlikely(max_level > MAX_RECURSION_LEVEL))
+		return -ETOOMANYREFS;
+
+	/*
+	 * Need to duplicate file references for the sake of garbage
+	 * collection.  Otherwise a socket in the fps might become a
+	 * candidate for GC while the skb is not yet queued.
+	 */
+	BUSCB(skb).fp = scm_fp_dup(scm->fp);
+	if (!BUSCB(skb).fp)
+		return -ENOMEM;
+
+	if (bus_sock_count) {
+		for (i = scm->fp->count - 1; i >= 0; i--)
+			bus_inflight(scm->fp->fp[i]);
+	}
+	return max_level;
+}
+
+static int bus_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb,
+			  bool send_fds)
+{
+	int err = 0;
+
+	BUSCB(skb).pid  = get_pid(scm->pid);
+	if (scm->cred)
+		BUSCB(skb).cred = get_cred(scm->cred);
+	BUSCB(skb).fp = NULL;
+	if (scm->fp && send_fds)
+		err = bus_attach_fds(scm, skb);
+
+	skb->destructor = bus_destruct_scm;
+	return err;
+}
+
+/*
+ * Some apps rely on write() giving SCM_CREDENTIALS
+ * We include credentials if source or destination socket
+ * asserted SOCK_PASSCRED.
+ */
+static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
+			    const struct sock *other)
+{
+	if (BUSCB(skb).cred)
+		return;
+	if (test_bit(SOCK_PASSCRED, &sock->flags) ||
+	    !other->sk_socket ||
+	    test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
+		BUSCB(skb).pid  = get_pid(task_tgid(current));
+		BUSCB(skb).cred = get_current_cred();
+	}
+}
+
+/*
+ *	Send AF_BUS data.
+ */
+
+static void bus_deliver_skb(struct sk_buff *skb)
+{
+	struct bus_send_context *sendctx = BUSCB(skb).sendctx;
+	struct socket *sock = sendctx->sender_socket;
+
+	if (sock_flag(sendctx->other, SOCK_RCVTSTAMP))
+		__net_timestamp(skb);
+	maybe_add_creds(skb, sock, sendctx->other);
+	skb_queue_tail(&sendctx->other->sk_receive_queue, skb);
+	if (sendctx->max_level > bus_sk(sendctx->other)->recursion_level)
+		bus_sk(sendctx->other)->recursion_level = sendctx->max_level;
+}
+
+/**
+ * bus_sendmsg_finish - delivery an skb to a destination
+ * @skb: sk_buff to deliver
+ *
+ * Delivers a packet to a destination. The skb control buffer has
+ * all the information about the destination contained on sending
+ * context. If the sending is unicast, then the skb is delivered
+ * and the receiver notified but if the sending is multicast, the
+ * skb is just marked as delivered and the actual delivery is made
+ * outside the function with the bus->send_lock held to ensure that
+ * the multicast sending is atomic.
+ */
+static int bus_sendmsg_finish(struct sk_buff *skb)
+{
+	int err;
+	struct bus_send_context *sendctx;
+	struct socket *sock;
+	struct sock *sk;
+	struct net *net;
+	size_t len = skb->len;
+
+	sendctx = BUSCB(skb).sendctx;
+	sock = sendctx->sender_socket;
+	sk = sock->sk;
+	net = sock_net(sk);
+
+restart:
+	if (!sendctx->other) {
+		err = -ECONNRESET;
+		if (sendctx->recipient == NULL)
+			goto out_free;
+
+		sendctx->other = bus_find_other(net, sendctx->recipient,
+						sendctx->namelen,
+						sk->sk_protocol,
+						sendctx->hash, &err);
+
+		if (sendctx->other == NULL ||
+		    !bus_sk(sendctx->other)->authenticated) {
+
+			if (sendctx->other)
+				sock_put(sendctx->other);
+
+			if (!bus_sk(sk)->bus_master_side) {
+				err = -ENOTCONN;
+				sendctx->other = bus_peer_get(sk);
+				if (!sendctx->other)
+					goto out_free;
+			} else {
+				sendctx->other = sk;
+				sock_hold(sendctx->other);
+			}
+		}
+	}
+
+	if (sk_filter(sendctx->other, skb) < 0) {
+		/* Toss the packet but do not return any error to the sender */
+		err = len;
+		goto out_free;
+	}
+
+	bus_state_lock(sendctx->other);
+
+	if (sock_flag(sendctx->other, SOCK_DEAD)) {
+		/*
+		 *	Check with 1003.1g - what should
+		 *	datagram error
+		 */
+		bus_state_unlock(sendctx->other);
+		sock_put(sendctx->other);
+
+		err = 0;
+		bus_state_lock(sk);
+		if (bus_peer(sk) == sendctx->other) {
+			bus_peer(sk) = NULL;
+			bus_state_unlock(sk);
+			sock_put(sendctx->other);
+			err = -ECONNREFUSED;
+		} else {
+			bus_state_unlock(sk);
+		}
+
+		sendctx->other = NULL;
+		if (err)
+			goto out_free;
+		goto restart;
+	}
+
+	err = -EPIPE;
+	if (sendctx->other->sk_shutdown & RCV_SHUTDOWN)
+		goto out_unlock;
+
+	if (bus_recvq_full(sendctx->other)) {
+		if (!sendctx->timeo) {
+			err = -EAGAIN;
+			goto out_unlock;
+		}
+
+		sendctx->timeo = bus_wait_for_peer(sendctx->other,
+						   sendctx->timeo);
+
+		err = sock_intr_errno(sendctx->timeo);
+		if (signal_pending(current))
+			goto out_free;
+
+		goto restart;
+	}
+
+	if (!sendctx->multicast && !sendctx->eavesdropper) {
+		bus_deliver_skb(skb);
+		bus_state_unlock(sendctx->other);
+		sendctx->other->sk_data_ready(sendctx->other, 0);
+		sock_put(sendctx->other);
+	} else {
+		sendctx->deliver = 1;
+		bus_state_unlock(sendctx->other);
+	}
+
+	return len;
+
+out_unlock:
+	bus_state_unlock(sendctx->other);
+out_free:
+	kfree_skb(skb);
+	if (sendctx->other)
+		sock_put(sendctx->other);
+
+	return err;
+}
+
+/**
+ * bus_sendmsg_mcast - do a multicast sending
+ * @skb: sk_buff to deliver
+ *
+ * Send a packet to a multicast destination.
+ * The function is also called for unicast sending when eavesdropping
+ * is enabled. Since the unicast destination and the eavesdroppers
+ * have to receive the packet atomically.
+ */
+static int bus_sendmsg_mcast(struct sk_buff *skb)
+{
+	struct bus_send_context *sendctx;
+	struct bus_send_context *tmpctx;
+	struct socket *sock;
+	struct sock *sk;
+	struct net *net;
+	struct bus_sock *u, *s;
+	struct hlist_node *node;
+	u16 prefix = 0;
+	struct sk_buff **skb_set = NULL;
+	struct bus_send_context **sendctx_set = NULL;
+	int  rcp_cnt, send_cnt;
+	int i;
+	int err;
+	int len = skb->len;
+	bool (*is_receiver) (struct sock *, u16);
+	bool main_rcp_found = false;
+
+	sendctx = BUSCB(skb).sendctx;
+	sendctx->deliver = 0;
+	sock = sendctx->sender_socket;
+	sk = sock->sk;
+	u = bus_sk(sk);
+	net = sock_net(sk);
+
+	if (sendctx->multicast) {
+		prefix = bus_addr_prefix(sendctx->recipient);
+		if (sendctx->eavesdropper)
+			is_receiver = &bus_has_prefix_eavesdropper;
+		else
+			is_receiver = &bus_has_prefix;
+	} else {
+		is_receiver = &bus_eavesdropper;
+
+		/*
+		 * If the destination is not the peer accepted socket
+		 * we have to get the correct destination.
+		 */
+		if (!sendctx->to_master && sendctx->recipient) {
+			sendctx->other = bus_find_other(net, sendctx->recipient,
+							sendctx->namelen,
+							sk->sk_protocol,
+							sendctx->hash, &err);
+
+
+			if (sendctx->other == NULL ||
+			    !bus_sk(sendctx->other)->authenticated) {
+
+				if (sendctx->other)
+					sock_put(sendctx->other);
+
+				if (sendctx->other == NULL) {
+					if (!bus_sk(sk)->bus_master_side) {
+						err = -ENOTCONN;
+						sendctx->other = bus_peer_get(sk);
+						if (!sendctx->other)
+							goto out;
+					} else {
+						sendctx->other = sk;
+						sock_hold(sendctx->other);
+					}
+				}
+				sendctx->to_master = 1;
+			}
+		}
+	}
+
+
+try_again:
+	rcp_cnt = 0;
+	main_rcp_found = false;
+
+	spin_lock(&u->bus->lock);
+
+	hlist_for_each_entry(s, node, &u->bus->peers, bus_node) {
+
+		if (!net_eq(sock_net(&s->sk), net))
+			continue;
+
+		if (is_receiver(&s->sk, prefix) ||
+		    (!sendctx->multicast &&
+		     !sendctx->to_master &&
+		     &s->sk == sendctx->other))
+			rcp_cnt++;
+	}
+
+	spin_unlock(&u->bus->lock);
+
+	/*
+	 * Memory can't be allocated while holding a spinlock so
+	 * we have to release the lock, do the allocation for the
+	 * array to store each destination peer sk_buff and grab
+	 * the bus peer lock again. Peers could have joined the
+	 * bus while we relesed the lock so we allocate 5 more
+	 * recipients hoping that this will be enough to not having
+	 * to try again in case only a few peers joined the bus.
+	 */
+	rcp_cnt += 5;
+	skb_set = kzalloc(sizeof(struct sk_buff *) * rcp_cnt, GFP_KERNEL);
+
+	if (!skb_set) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	sendctx_set = kzalloc(sizeof(struct bus_send_context *) * rcp_cnt,
+			      GFP_KERNEL);
+	if (!sendctx_set) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < rcp_cnt; i++) {
+		skb_set[i] = skb_clone(skb, GFP_KERNEL);
+		if (!skb_set[i]) {
+			err = -ENOMEM;
+			goto out_free;
+		}
+		sendctx_set[i] = BUSCB(skb_set[i]).sendctx
+			= kmalloc(sizeof(*sendctx) * rcp_cnt, GFP_KERNEL);
+		if (!sendctx_set[i]) {
+			err = -ENOMEM;
+			goto out_free;
+		}
+		memcpy(sendctx_set[i], sendctx, sizeof(*sendctx));
+		err = bus_scm_to_skb(sendctx_set[i]->siocb->scm,
+				     skb_set[i], true);
+		if (err < 0)
+			goto out_free;
+		bus_get_secdata(sendctx_set[i]->siocb->scm,
+				skb_set[i]);
+
+		sendctx_set[i]->other = NULL;
+	}
+
+	send_cnt = 0;
+
+	spin_lock(&u->bus->lock);
+
+	hlist_for_each_entry(s, node, &u->bus->peers, bus_node) {
+
+		if (!net_eq(sock_net(&s->sk), net))
+			continue;
+
+		if (send_cnt >= rcp_cnt) {
+			spin_unlock(&u->bus->lock);
+
+			for (i = 0; i < rcp_cnt; i++) {
+				sock_put(sendctx_set[i]->other);
+				kfree_skb(skb_set[i]);
+				kfree(sendctx_set[i]);
+			}
+			kfree(skb_set);
+			kfree(sendctx_set);
+			sendctx_set = NULL;
+			skb_set = NULL;
+			goto try_again;
+		}
+
+		if (is_receiver(&s->sk, prefix) ||
+		    (!sendctx->multicast &&
+		     !sendctx->to_master &&
+		     &s->sk == sendctx->other)) {
+			skb_set_owner_w(skb_set[send_cnt], &s->sk);
+			tmpctx = BUSCB(skb_set[send_cnt]).sendctx;
+			sock_hold(&s->sk);
+			if (&s->sk == sendctx->other) {
+				tmpctx->main_recipient = 1;
+				main_rcp_found = true;
+			}
+			tmpctx->other = &s->sk;
+			tmpctx->recipient = s->addr->name;
+			tmpctx->eavesdropper = bus_eavesdropper(&s->sk, 0);
+
+			send_cnt++;
+		}
+	}
+
+	spin_unlock(&u->bus->lock);
+
+	/*
+	 * Peers have left the bus so we have to free
+	 * their pre-allocated bus_send_context and
+	 * socket buffers.
+	 */
+	if (send_cnt < rcp_cnt) {
+		for (i = send_cnt; i < rcp_cnt; i++) {
+			kfree_skb(skb_set[i]);
+			kfree(sendctx_set[i]);
+		}
+		rcp_cnt = send_cnt;
+	}
+
+	for (i = 0; i < send_cnt; i++) {
+		tmpctx = BUSCB(skb_set[i]).sendctx;
+		tmpctx->deliver = 0;
+		err = NF_HOOK(NFPROTO_BUS, NF_BUS_SENDING, skb_set[i],
+			      NULL, NULL, bus_sendmsg_finish);
+		if (err == -EPERM)
+			sock_put(tmpctx->other);
+	}
+
+	/*
+	 * If the send context is not multicast, the destination
+	 * coud be either the peer accepted socket descriptor or
+	 * a peer that is not an eavesdropper. If the peer is not
+	 * the accepted socket descriptor and has been authenticated,
+	 * it is a member of the bus peer list so it has already been
+	 * marked for delivery.
+	 * But if the destination is the accepted socket descriptor
+	 * or is a non-authenticated peer it is not a member of the
+	 * bus peer list so the packet has to be explicitly deliver
+	 * to it.
+	 */
+
+	if (!sendctx->multicast &&
+	    (sendctx->to_master ||
+	     (sendctx->bus_master_side && !main_rcp_found))) {
+		sendctx->main_recipient = 1;
+		err = NF_HOOK(NFPROTO_BUS, NF_BUS_SENDING, skb, NULL, NULL,
+			bus_sendmsg_finish);
+		if (err == -EPERM)
+			sock_put(sendctx->other);
+	}
+
+	spin_lock(&u->bus->send_lock);
+
+	for (i = 0; i < send_cnt; i++) {
+		tmpctx = sendctx_set[i];
+		if (tmpctx->deliver != 1)
+			continue;
+
+		bus_state_lock(tmpctx->other);
+		bus_deliver_skb(skb_set[i]);
+		bus_state_unlock(tmpctx->other);
+	}
+
+	if (!sendctx->multicast &&
+	    sendctx->deliver == 1 &&
+	    !bus_sk(sendctx->other)->eavesdropper) {
+		bus_state_lock(sendctx->other);
+		bus_deliver_skb(skb);
+		bus_state_unlock(sendctx->other);
+	}
+
+	spin_unlock(&u->bus->send_lock);
+
+	for (i = 0; i < send_cnt; i++) {
+		tmpctx = sendctx_set[i];
+		if (tmpctx->deliver != 1)
+			continue;
+
+		tmpctx->other->sk_data_ready(tmpctx->other, 0);
+		sock_put(tmpctx->other);
+	}
+
+	if (!sendctx->multicast &&
+	    sendctx->deliver == 1 &&
+	    !bus_sk(sendctx->other)->eavesdropper) {
+		sendctx->other->sk_data_ready(sendctx->other, 0);
+		sock_put(sendctx->other);
+	}
+
+	err = len;
+	goto out;
+
+out_free:
+	for (i = 0; i < rcp_cnt; i++) {
+		if (skb_set[i])
+			kfree_skb(skb_set[i]);
+	}
+
+out:
+	kfree(skb_set);
+	if (sendctx_set) {
+		for (i = 0; i < rcp_cnt; i++)
+			kfree(sendctx_set[i]);
+		kfree(sendctx_set);
+	}
+
+	if (sendctx->deliver == 0) {
+		if (!sendctx->to_master &&
+		    !(sendctx->bus_master_side && !main_rcp_found))
+			kfree_skb(skb);
+		if (!sendctx->to_master &&
+		    !(sendctx->bus_master_side && !main_rcp_found))
+			if (sendctx->other)
+				sock_put(sendctx->other);
+	}
+	scm_destroy(sendctx->siocb->scm);
+
+	return err;
+}
+
+/**
+ * bus_sendmsg - send an skb to a destination
+ * @kiocb: I/O control block info
+ * @sock: sender socket
+ * @msg: message header
+ * @len: message length
+ *
+ * Send an socket buffer to a destination. The destination could be
+ * either an unicast or a multicast address. In any case, a copy of
+ * the packet has to be send to all the sockets that are allowed to
+ * eavesdrop the communication bus.
+ *
+ * If the destination address is not associated with any socket, the
+ * packet is default routed to the bus master (the sender accepted
+ * socket).
+ *
+ * The af_bus sending path is hooked to the netfilter subsystem so
+ * netfilter hooks can filter or modify the packet before delivery.
+ */
+static int bus_sendmsg(struct kiocb *kiocb, struct socket *sock,
+				struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct bus_sock *u = bus_sk(sk);
+	struct sockaddr_bus *sbusaddr = msg->msg_name;
+	int err;
+	struct sk_buff *skb;
+	struct scm_cookie tmp_scm;
+	bool to_master = false;
+	bool multicast = false;
+	struct bus_send_context sendctx;
+
+	err = sock_error(sk);
+	if (err)
+		return err;
+
+	if (sk->sk_state != BUS_ESTABLISHED)
+		return -ENOTCONN;
+
+	if (!msg->msg_namelen)
+		sbusaddr = NULL;
+
+	if (sbusaddr && !bus_same_bus(sbusaddr, u->addr->name))
+		return -EHOSTUNREACH;
+
+	if ((!sbusaddr && !u->bus_master_side) ||
+	    (sbusaddr && sbusaddr->sbus_addr.s_addr == BUS_MASTER_ADDR))
+		to_master = true;
+	else if (sbusaddr && !u->bus_master_side && !u->authenticated)
+		return -EHOSTUNREACH;
+
+	sendctx.namelen = 0; /* fake GCC */
+	sendctx.siocb = kiocb_to_siocb(kiocb);
+	sendctx.other = NULL;
+
+	if (NULL == sendctx.siocb->scm)
+		sendctx.siocb->scm = &tmp_scm;
+	wait_for_bus_gc();
+	err = scm_send(sock, msg, sendctx.siocb->scm);
+	if (err < 0)
+		return err;
+
+	err = -EOPNOTSUPP;
+	if (msg->msg_flags&MSG_OOB)
+		goto out;
+
+	if (sbusaddr && !to_master) {
+		err = bus_mkname(sbusaddr, msg->msg_namelen, &sendctx.hash);
+		if (err < 0)
+			goto out;
+		sendctx.namelen = err;
+		multicast = bus_mc_addr(sbusaddr);
+	} else {
+		err = -ENOTCONN;
+		sendctx.other = bus_peer_get(sk);
+		if (!sendctx.other)
+			goto out;
+	}
+
+	err = -EMSGSIZE;
+	if (len > sk->sk_sndbuf - 32)
+		goto out;
+
+	sendctx.timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
+restart:
+	bus_state_lock(sk);
+	if (bus_recvq_full(sk)) {
+		err = -EAGAIN;
+		if (!sendctx.timeo) {
+			bus_state_unlock(sk);
+			goto out;
+		}
+
+		sendctx.timeo = bus_wait_for_peer(sk, sendctx.timeo);
+
+		err = sock_intr_errno(sendctx.timeo);
+		if (signal_pending(current))
+			goto out;
+
+		goto restart;
+	} else {
+		bus_state_unlock(sk);
+	}
+
+	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
+	if (skb == NULL)
+		goto out;
+
+	err = bus_scm_to_skb(sendctx.siocb->scm, skb, true);
+	if (err < 0)
+		goto out_free;
+	sendctx.max_level = err + 1;
+	bus_get_secdata(sendctx.siocb->scm, skb);
+
+	skb_reset_transport_header(skb);
+	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+	if (err)
+		goto out_free;
+
+	sendctx.sender_socket = sock;
+	if (u->bus_master_side && sendctx.other) {
+		/* if the bus master sent an unicast message to a peer, we
+		 * need the address of that peer
+		 */
+		sendctx.sender = bus_sk(sendctx.other)->addr->name;
+	} else {
+		sendctx.sender = u->addr->name;
+	}
+	sendctx.recipient = sbusaddr;
+	sendctx.authenticated = u->authenticated;
+	sendctx.bus_master_side = u->bus_master_side;
+	sendctx.to_master = to_master;
+	sendctx.multicast = multicast;
+	sendctx.eavesdropper = atomic64_read(&u->bus->eavesdropper_cnt) ? 1 : 0;
+	BUSCB(skb).sendctx = &sendctx;
+
+	if (sendctx.multicast || sendctx.eavesdropper) {
+		sendctx.main_recipient = 0;
+		err = bus_sendmsg_mcast(skb);
+		return sendctx.multicast ? len : err;
+	} else {
+		sendctx.main_recipient = 1;
+		len = NF_HOOK(NFPROTO_BUS, NF_BUS_SENDING, skb, NULL, NULL,
+			      bus_sendmsg_finish);
+
+		if (len == -EPERM) {
+			err = len;
+			goto out;
+		} else {
+			scm_destroy(sendctx.siocb->scm);
+			return len;
+		}
+	}
+
+out_free:
+	kfree_skb(skb);
+out:
+	if (sendctx.other)
+		sock_put(sendctx.other);
+	scm_destroy(sendctx.siocb->scm);
+	return err;
+}
+
+static void bus_copy_addr(struct msghdr *msg, struct sock *sk)
+{
+	struct bus_sock *u = bus_sk(sk);
+
+	msg->msg_namelen = 0;
+	if (u->addr) {
+		msg->msg_namelen = u->addr->len;
+		memcpy(msg->msg_name, u->addr->name,
+		       sizeof(struct sockaddr_bus));
+	}
+}
+
+static int bus_recvmsg(struct kiocb *iocb, struct socket *sock,
+			  struct msghdr *msg, size_t size, int flags)
+{
+	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
+	struct scm_cookie tmp_scm;
+	struct sock *sk = sock->sk;
+	struct bus_sock *u = bus_sk(sk);
+	int noblock = flags & MSG_DONTWAIT;
+	struct sk_buff *skb;
+	int err;
+	int peeked, skip;
+
+	if (sk->sk_state != BUS_ESTABLISHED)
+		return -ENOTCONN;
+
+	err = -EOPNOTSUPP;
+	if (flags&MSG_OOB)
+		goto out;
+
+	msg->msg_namelen = 0;
+
+	err = mutex_lock_interruptible(&u->readlock);
+	if (err) {
+		err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
+		goto out;
+	}
+
+	skip = sk_peek_offset(sk, flags);
+
+	skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
+	if (!skb) {
+		bus_state_lock(sk);
+		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
+		if (err == -EAGAIN && (sk->sk_shutdown & RCV_SHUTDOWN))
+			err = 0;
+		bus_state_unlock(sk);
+		goto out_unlock;
+	}
+
+	wake_up_interruptible_sync_poll(&u->peer_wait,
+					POLLOUT | POLLWRNORM | POLLWRBAND);
+
+	if (msg->msg_name)
+		bus_copy_addr(msg, skb->sk);
+
+	if (size > skb->len - skip)
+		size = skb->len - skip;
+	else if (size < skb->len - skip)
+		msg->msg_flags |= MSG_TRUNC;
+
+	err = skb_copy_datagram_iovec(skb, skip, msg->msg_iov, size);
+	if (err)
+		goto out_free;
+
+	if (sock_flag(sk, SOCK_RCVTSTAMP))
+		__sock_recv_timestamp(msg, sk, skb);
+
+	if (!siocb->scm) {
+		siocb->scm = &tmp_scm;
+		memset(&tmp_scm, 0, sizeof(tmp_scm));
+	}
+	scm_set_cred(siocb->scm, BUSCB(skb).pid, BUSCB(skb).cred);
+	bus_set_secdata(siocb->scm, skb);
+
+	if (!(flags & MSG_PEEK)) {
+		if (BUSCB(skb).fp)
+			bus_detach_fds(siocb->scm, skb);
+
+		sk_peek_offset_bwd(sk, skb->len);
+	} else {
+		/* It is questionable: on PEEK we could:
+		   - do not return fds - good, but too simple 8)
+		   - return fds, and do not return them on read (old strategy,
+		     apparently wrong)
+		   - clone fds (I chose it for now, it is the most universal
+		     solution)
+
+		   POSIX 1003.1g does not actually define this clearly
+		   at all. POSIX 1003.1g doesn't define a lot of things
+		   clearly however!
+
+		*/
+
+		sk_peek_offset_fwd(sk, size);
+
+		if (BUSCB(skb).fp)
+			siocb->scm->fp = scm_fp_dup(BUSCB(skb).fp);
+	}
+	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
+
+	scm_recv(sock, msg, siocb->scm, flags);
+
+out_free:
+	skb_free_datagram(sk, skb);
+out_unlock:
+	mutex_unlock(&u->readlock);
+out:
+	return err;
+}
+
+static int bus_shutdown(struct socket *sock, int mode)
+{
+	struct sock *sk = sock->sk;
+	struct sock *other;
+
+	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
+
+	if (!mode)
+		return 0;
+
+	bus_state_lock(sk);
+	sk->sk_shutdown |= mode;
+	other = bus_peer(sk);
+	if (other)
+		sock_hold(other);
+	bus_state_unlock(sk);
+	sk->sk_state_change(sk);
+
+	if (other) {
+
+		int peer_mode = 0;
+
+		if (mode&RCV_SHUTDOWN)
+			peer_mode |= SEND_SHUTDOWN;
+		if (mode&SEND_SHUTDOWN)
+			peer_mode |= RCV_SHUTDOWN;
+		bus_state_lock(other);
+		other->sk_shutdown |= peer_mode;
+		bus_state_unlock(other);
+		other->sk_state_change(other);
+		if (peer_mode == SHUTDOWN_MASK)
+			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
+		else if (peer_mode & RCV_SHUTDOWN)
+			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
+		sock_put(other);
+	}
+
+	return 0;
+}
+
+static int bus_add_addr(struct sock *sk, struct bus_addr *sbus_addr)
+{
+	struct bus_address *addr;
+	struct sock *other;
+	struct bus_sock *u = bus_sk(sk);
+	struct net *net = sock_net(sk);
+	int ret = 0;
+
+	addr = kzalloc(sizeof(*addr) + sizeof(struct sockaddr_bus), GFP_KERNEL);
+	if (!addr) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	memcpy(addr->name, u->addr->name, sizeof(struct sockaddr_bus));
+	addr->len = u->addr->len;
+
+	addr->name->sbus_addr.s_addr = sbus_addr->s_addr;
+	addr->hash = bus_compute_hash(addr->name->sbus_addr);
+	other = bus_find_socket_byaddress(net, addr->name, addr->len,
+					  sk->sk_protocol, addr->hash);
+
+	if (other) {
+		sock_put(other);
+		kfree(addr);
+		ret = -EADDRINUSE;
+		goto out;
+	}
+
+	atomic_set(&addr->refcnt, 1);
+	INIT_HLIST_NODE(&addr->addr_node);
+	INIT_HLIST_NODE(&addr->table_node);
+
+	addr->sock = sk;
+
+	hlist_add_head(&addr->addr_node, &u->addr_list);
+	bus_insert_address(&bus_address_table[addr->hash], addr);
+
+out:
+	sock_put(sk);
+
+	return ret;
+}
+
+static int bus_del_addr(struct sock *sk, struct bus_addr *sbus_addr)
+{
+	struct bus_address *addr;
+	int ret = 0;
+
+	bus_state_lock(sk);
+	addr = __bus_get_address(sk, sbus_addr);
+	if (!addr) {
+		ret = -EINVAL;
+		bus_state_unlock(sk);
+		goto out;
+	}
+	hlist_del(&addr->addr_node);
+	bus_state_unlock(sk);
+
+	bus_remove_address(addr);
+	bus_release_addr(addr);
+out:
+	sock_put(sk);
+
+	return ret;
+}
+
+static int bus_join_bus(struct sock *sk)
+{
+	struct sock *peer;
+	struct bus_sock *u = bus_sk(sk), *peeru;
+	int err = 0;
+
+	peer = bus_peer_get(sk);
+	if (!peer)
+		return -ENOTCONN;
+	peeru = bus_sk(peer);
+
+	if (!u->bus_master_side || peeru->authenticated) {
+		err = -EINVAL;
+		goto sock_put_out;
+	}
+
+	if (sk->sk_state != BUS_ESTABLISHED) {
+		err = -ENOTCONN;
+		goto sock_put_out;
+	}
+
+	if (peer->sk_shutdown != 0) {
+		err = -ENOTCONN;
+		goto sock_put_out;
+	}
+
+	bus_state_lock(peer);
+	peeru->authenticated = true;
+	bus_state_unlock(peer);
+
+	spin_lock(&u->bus->lock);
+	hlist_add_head(&peeru->bus_node, &u->bus->peers);
+	spin_unlock(&u->bus->lock);
+
+sock_put_out:
+	sock_put(peer);
+	return err;
+}
+
+static int __bus_set_eavesdrop(struct sock *sk, bool eavesdrop)
+{
+	struct sock *peer = bus_peer_get(sk);
+	struct bus_sock *u = bus_sk(sk), *peeru;
+	int err = 0;
+
+	if (!peer)
+		return -ENOTCONN;
+
+	if (sk->sk_state != BUS_ESTABLISHED) {
+		err = -ENOTCONN;
+		goto sock_put_out;
+	}
+
+	peeru = bus_sk(peer);
+
+	if (!u->bus_master_side || !peeru->authenticated) {
+		err = -EINVAL;
+		goto sock_put_out;
+	}
+
+	if (peer->sk_shutdown != 0) {
+		err = -ENOTCONN;
+		goto sock_put_out;
+	}
+
+	bus_state_lock(peeru);
+	if (peeru->eavesdropper != eavesdrop) {
+		peeru->eavesdropper = eavesdrop;
+		if (eavesdrop)
+			atomic64_inc(&u->bus->eavesdropper_cnt);
+		else
+			atomic64_dec(&u->bus->eavesdropper_cnt);
+	}
+	bus_state_unlock(peeru);
+
+sock_put_out:
+	sock_put(peer);
+	return err;
+}
+
+static int bus_set_eavesdrop(struct sock *sk)
+{
+	return __bus_set_eavesdrop(sk, true);
+}
+
+static int bus_unset_eavesdrop(struct sock *sk)
+{
+	return __bus_set_eavesdrop(sk, false);
+}
+
+static inline void sk_sendbuf_set(struct sock *sk, int sndbuf)
+{
+	bus_state_lock(sk);
+	sk->sk_sndbuf = sndbuf;
+	bus_state_unlock(sk);
+}
+
+static inline void sk_maxqlen_set(struct sock *sk, int qlen)
+{
+	bus_state_lock(sk);
+	sk->sk_max_ack_backlog = qlen;
+	bus_state_unlock(sk);
+}
+
+static int bus_setsockopt(struct socket *sock, int level, int optname,
+			   char __user *optval, unsigned int optlen)
+{
+	struct bus_addr addr;
+	int res;
+	int val;
+
+	if (level != SOL_BUS)
+		return -ENOPROTOOPT;
+
+	switch (optname) {
+	case BUS_ADD_ADDR:
+	case BUS_DEL_ADDR:
+		if (optlen < sizeof(struct bus_addr))
+			return -EINVAL;
+
+		if (!bus_sk(sock->sk)->bus_master_side)
+			return -EINVAL;
+
+		if (copy_from_user(&addr, optval, sizeof(struct bus_addr)))
+			return -EFAULT;
+
+		if (optname == BUS_ADD_ADDR)
+			res = bus_add_addr(bus_peer_get(sock->sk), &addr);
+		else
+			res = bus_del_addr(bus_peer_get(sock->sk), &addr);
+		break;
+	case BUS_JOIN_BUS:
+		res = bus_join_bus(sock->sk);
+		break;
+	case BUS_SET_EAVESDROP:
+		res = bus_set_eavesdrop(sock->sk);
+		break;
+	case BUS_UNSET_EAVESDROP:
+		res = bus_unset_eavesdrop(sock->sk);
+		break;
+	case BUS_SET_SENDBUF:
+	case BUS_SET_MAXQLEN:
+		if (sock->sk->sk_state != BUS_LISTEN) {
+			res = -EINVAL;
+		} else {
+			res = -EFAULT;
+
+			if (copy_from_user(&val, optval, optlen))
+				break;
+
+			res = 0;
+
+			if (optname == BUS_SET_SENDBUF)
+				sk_sendbuf_set(sock->sk, val);
+			else
+				sk_maxqlen_set(sock->sk, val);
+		}
+		break;
+	default:
+		res = -EINVAL;
+		break;
+	}
+
+	return res;
+}
+
+long bus_inq_len(struct sock *sk)
+{
+	struct sk_buff *skb;
+	long amount = 0;
+
+	if (sk->sk_state == BUS_LISTEN)
+		return -EINVAL;
+
+	spin_lock(&sk->sk_receive_queue.lock);
+	skb_queue_walk(&sk->sk_receive_queue, skb)
+		amount += skb->len;
+	spin_unlock(&sk->sk_receive_queue.lock);
+
+	return amount;
+}
+EXPORT_SYMBOL_GPL(bus_inq_len);
+
+long bus_outq_len(struct sock *sk)
+{
+	return sk_wmem_alloc_get(sk);
+}
+EXPORT_SYMBOL_GPL(bus_outq_len);
+
+static int bus_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	long amount = 0;
+	int err;
+
+	switch (cmd) {
+	case SIOCOUTQ:
+		amount = bus_outq_len(sk);
+		err = put_user(amount, (int __user *)arg);
+		break;
+	case SIOCINQ:
+		amount = bus_inq_len(sk);
+		if (amount < 0)
+			err = amount;
+		else
+			err = put_user(amount, (int __user *)arg);
+		break;
+	default:
+		err = -ENOIOCTLCMD;
+		break;
+	}
+	return err;
+}
+
+static unsigned int bus_poll(struct file *file, struct socket *sock,
+				    poll_table *wait)
+{
+	struct sock *sk = sock->sk, *other;
+	unsigned int mask, writable;
+	struct bus_sock *u = bus_sk(sk), *p;
+	struct hlist_node *node;
+
+	sock_poll_wait(file, sk_sleep(sk), wait);
+	mask = 0;
+
+	/* exceptional events? */
+	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+		mask |= POLLERR;
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
+	if (sk->sk_shutdown == SHUTDOWN_MASK)
+		mask |= POLLHUP;
+
+	/* readable? */
+	if (!skb_queue_empty(&sk->sk_receive_queue))
+		mask |= POLLIN | POLLRDNORM;
+
+	/* Connection-based need to check for termination and startup */
+	if (sk->sk_state == BUS_CLOSE)
+		mask |= POLLHUP;
+
+	/* No write status requested, avoid expensive OUT tests. */
+	if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
+		return mask;
+
+	writable = bus_writable(sk);
+	other = bus_peer_get(sk);
+	if (other) {
+		if (bus_recvq_full(other))
+			writable = 0;
+		sock_put(other);
+	}
+
+	/*
+	 * If the socket has already joined the bus we have to check
+	 * that each peer receiver queue on the bus is not full.
+	 */
+	if (!u->bus_master_side && u->authenticated) {
+		spin_lock(&u->bus->lock);
+		hlist_for_each_entry(p, node, &u->bus->peers, bus_node) {
+			if (bus_recvq_full(&p->sk)) {
+				writable = 0;
+				break;
+			}
+		}
+		spin_unlock(&u->bus->lock);
+	}
+
+	if (writable)
+		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+	else
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+	return mask;
+}
+
+#ifdef CONFIG_PROC_FS
+static struct sock *first_bus_socket(int *i)
+{
+	for (*i = 0; *i <= BUS_HASH_SIZE; (*i)++) {
+		if (!hlist_empty(&bus_socket_table[*i]))
+			return __sk_head(&bus_socket_table[*i]);
+	}
+	return NULL;
+}
+
+static struct sock *next_bus_socket(int *i, struct sock *s)
+{
+	struct sock *next = sk_next(s);
+	/* More in this chain? */
+	if (next)
+		return next;
+	/* Look for next non-empty chain. */
+	for ((*i)++; *i <= BUS_HASH_SIZE; (*i)++) {
+		if (!hlist_empty(&bus_socket_table[*i]))
+			return __sk_head(&bus_socket_table[*i]);
+	}
+	return NULL;
+}
+
+struct bus_iter_state {
+	struct seq_net_private p;
+	int i;
+};
+
+static struct sock *bus_seq_idx(struct seq_file *seq, loff_t pos)
+{
+	struct bus_iter_state *iter = seq->private;
+	loff_t off = 0;
+	struct sock *s;
+
+	for (s = first_bus_socket(&iter->i); s;
+	     s = next_bus_socket(&iter->i, s)) {
+		if (sock_net(s) != seq_file_net(seq))
+			continue;
+		if (off == pos)
+			return s;
+		++off;
+	}
+	return NULL;
+}
+
+static void *bus_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(bus_table_lock)
+{
+	spin_lock(&bus_table_lock);
+	return *pos ? bus_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *bus_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct bus_iter_state *iter = seq->private;
+	struct sock *sk = v;
+	++*pos;
+
+	if (v == SEQ_START_TOKEN)
+		sk = first_bus_socket(&iter->i);
+	else
+		sk = next_bus_socket(&iter->i, sk);
+	while (sk && (sock_net(sk) != seq_file_net(seq)))
+		sk = next_bus_socket(&iter->i, sk);
+	return sk;
+}
+
+static void bus_seq_stop(struct seq_file *seq, void *v)
+	__releases(bus_table_lock)
+{
+	spin_unlock(&bus_table_lock);
+}
+
+static int bus_seq_show(struct seq_file *seq, void *v)
+{
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "Num       RefCount Protocol Flags    Type St " \
+			 "Inode Path\n");
+	else {
+		struct sock *s = v;
+		struct bus_sock *u = bus_sk(s);
+		bus_state_lock(s);
+
+		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
+			s,
+			atomic_read(&s->sk_refcnt),
+			0,
+			s->sk_state == BUS_LISTEN ? __SO_ACCEPTCON : 0,
+			s->sk_type,
+			s->sk_socket ?
+			(s->sk_state == BUS_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
+			(s->sk_state == BUS_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
+			sock_i_ino(s));
+
+		if (u->addr) {
+			int i, len;
+			seq_putc(seq, ' ');
+
+			i = 0;
+			len = u->addr->len - sizeof(short);
+			if (!BUS_ABSTRACT(s))
+				len--;
+			else {
+				seq_putc(seq, '@');
+				i++;
+			}
+			for ( ; i < len; i++)
+				seq_putc(seq, u->addr->name->sbus_path[i]);
+		}
+		bus_state_unlock(s);
+		seq_putc(seq, '\n');
+	}
+
+	return 0;
+}
+
+static const struct seq_operations bus_seq_ops = {
+	.start  = bus_seq_start,
+	.next   = bus_seq_next,
+	.stop   = bus_seq_stop,
+	.show   = bus_seq_show,
+};
+
+static int bus_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &bus_seq_ops,
+			    sizeof(struct bus_iter_state));
+}
+
+static const struct file_operations bus_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open		= bus_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_net,
+};
+
+#endif
+
+static const struct net_proto_family bus_family_ops = {
+	.family = PF_BUS,
+	.create = bus_create,
+	.owner	= THIS_MODULE,
+};
+
+static int __init af_bus_init(void)
+{
+	int rc = -1;
+	struct sk_buff *dummy_skb;
+
+	BUILD_BUG_ON(sizeof(struct bus_skb_parms) > sizeof(dummy_skb->cb));
+
+	rc = proto_register(&bus_proto, 1);
+	if (rc != 0) {
+		pr_crit("%s: Cannot create bus_sock SLAB cache!\n", __func__);
+		return rc;
+	}
+
+	sock_register(&bus_family_ops);
+	return rc;
+}
+
+static void __exit af_bus_exit(void)
+{
+	sock_unregister(PF_BUS);
+	proto_unregister(&bus_proto);
+}
+
+module_init(af_bus_init);
+module_exit(af_bus_exit);
+
+MODULE_AUTHOR("Alban Crequy, Javier Martinez Canillas");
+MODULE_DESCRIPTION("Linux Bus domain sockets");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_BUS);
-- 
1.7.10

^ permalink raw reply related

* [PATCH net-next 07/15] scm: allow AF_BUS sockets to send ancillary data
From: Vincent Sanders @ 2012-06-29 16:45 UTC (permalink / raw)
  To: netdev, linux-kernel, David S. Miller
  Cc: Javier Martinez Canillas, Vincent Sanders
In-Reply-To: <1340988354-26981-1-git-send-email-vincent.sanders@collabora.co.uk>

From: Javier Martinez Canillas <javier.martinez@collabora.co.uk>

Similar to UNIX domain sockets AF_BUS sockets support passing file
descriptors and process credentials which requires supporting passing
control messages.

The core socket level control messages processing requires extending
to allow sockets other than PF_UNIX to send SCM_RIGHTS type messages.

Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Signed-off-by: Vincent Sanders <vincent.sanders@collabora.co.uk>
---
 net/core/scm.c |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/scm.c b/net/core/scm.c
index 611c5ef..87e3152 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -158,7 +158,8 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
 		switch (cmsg->cmsg_type)
 		{
 		case SCM_RIGHTS:
-			if (!sock->ops || sock->ops->family != PF_UNIX)
+			if (!sock->ops || (sock->ops->family != PF_UNIX &&
+					   sock->ops->family != PF_BUS))
 				goto error;
 			err=scm_fp_copy(cmsg, &p->fp);
 			if (err<0)
-- 
1.7.10

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox