Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH 3/3] drivers/net/wireless/b43: Rename b43_debug to b43_debugging
From: Joe Perches @ 2010-06-19 23:30 UTC (permalink / raw)
  To: Stefano Brivio; +Cc: John W. Linville, linux-wireless, netdev, linux-kernel
In-Reply-To: <cover.1276988387.git.joe@perches.com>

Avoid using <foo>_debug function name because that is most commonly
used to emit logging messages.

Signed-off-by: Joe Perches <joe@perches.com>
---
 drivers/net/wireless/b43/debugfs.c |    2 +-
 drivers/net/wireless/b43/debugfs.h |    4 ++--
 drivers/net/wireless/b43/dma.c     |   10 +++++-----
 drivers/net/wireless/b43/lo.c      |    6 +++---
 drivers/net/wireless/b43/main.c    |   16 ++++++++--------
 drivers/net/wireless/b43/phy_g.c   |    8 ++++----
 6 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/drivers/net/wireless/b43/debugfs.c b/drivers/net/wireless/b43/debugfs.c
index 307802c..a9f8d48 100644
--- a/drivers/net/wireless/b43/debugfs.c
+++ b/drivers/net/wireless/b43/debugfs.c
@@ -645,7 +645,7 @@ B43_DEBUGFS_FOPS(restart, NULL, restart_write_file);
 B43_DEBUGFS_FOPS(loctls, loctls_read_file, NULL);
 
 
-bool b43_debug(struct b43_wldev *dev, enum b43_dyndbg feature)
+bool b43_debugging(struct b43_wldev *dev, enum b43_dyndbg feature)
 {
 	bool enabled;
 
diff --git a/drivers/net/wireless/b43/debugfs.h b/drivers/net/wireless/b43/debugfs.h
index 822aad8..f34da88 100644
--- a/drivers/net/wireless/b43/debugfs.h
+++ b/drivers/net/wireless/b43/debugfs.h
@@ -73,7 +73,7 @@ struct b43_dfsentry {
 	struct dentry *dyn_debug_dentries[__B43_NR_DYNDBG];
 };
 
-bool b43_debug(struct b43_wldev *dev, enum b43_dyndbg feature);
+bool b43_debugging(struct b43_wldev *dev, enum b43_dyndbg feature);
 
 void b43_debugfs_init(void);
 void b43_debugfs_exit(void);
@@ -84,7 +84,7 @@ void b43_debugfs_log_txstat(struct b43_wldev *dev,
 
 #else /* CONFIG_B43_DEBUG */
 
-static inline bool b43_debug(struct b43_wldev *dev, enum b43_dyndbg feature)
+static inline bool b43_debugging(struct b43_wldev *dev, enum b43_dyndbg feature)
 {
 	return 0;
 }
diff --git a/drivers/net/wireless/b43/dma.c b/drivers/net/wireless/b43/dma.c
index a6d15c4..6343378 100644
--- a/drivers/net/wireless/b43/dma.c
+++ b/drivers/net/wireless/b43/dma.c
@@ -265,7 +265,7 @@ static void update_max_used_slots(struct b43_dmaring *ring,
 	if (current_used_slots <= ring->max_used_slots)
 		return;
 	ring->max_used_slots = current_used_slots;
-	if (b43_debug(ring->dev, B43_DBG_DMAVERBOSE)) {
+	if (b43_debugging(ring->dev, B43_DBG_DMAVERBOSE)) {
 		b43_dbg(ring->dev->wl,
 			"max_used_slots increased to %d on %s ring %d\n",
 			ring->max_used_slots,
@@ -1264,7 +1264,7 @@ out_unmap_hdr:
 static inline int should_inject_overflow(struct b43_dmaring *ring)
 {
 #ifdef CONFIG_B43_DEBUG
-	if (unlikely(b43_debug(ring->dev, B43_DBG_DMAOVERFLOW))) {
+	if (unlikely(b43_debugging(ring->dev, B43_DBG_DMAOVERFLOW))) {
 		/* Check if we should inject another ringbuffer overflow
 		 * to test handling of this situation in the stack. */
 		unsigned long next_overflow;
@@ -1340,7 +1340,7 @@ int b43_dma_tx(struct b43_wldev *dev, struct sk_buff *skb)
 		 * Because of a race, one packet may be queued after
 		 * the queue is stopped, thus we got called when we shouldn't.
 		 * For now, just refuse the transmit. */
-		if (b43_debug(dev, B43_DBG_DMAVERBOSE))
+		if (b43_debugging(dev, B43_DBG_DMAVERBOSE))
 			b43_err(dev->wl, "Packet after queue stopped\n");
 		err = -ENOSPC;
 		goto out;
@@ -1376,7 +1376,7 @@ int b43_dma_tx(struct b43_wldev *dev, struct sk_buff *skb)
 		/* This TX ring is full. */
 		ieee80211_stop_queue(dev->wl->hw, skb_get_queue_mapping(skb));
 		ring->stopped = 1;
-		if (b43_debug(dev, B43_DBG_DMAVERBOSE)) {
+		if (b43_debugging(dev, B43_DBG_DMAVERBOSE)) {
 			b43_dbg(dev->wl, "Stopped TX ring %d\n", ring->index);
 		}
 	}
@@ -1494,7 +1494,7 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev,
 		B43_WARN_ON(free_slots(ring) < TX_SLOTS_PER_FRAME);
 		ieee80211_wake_queue(dev->wl->hw, ring->queue_prio);
 		ring->stopped = 0;
-		if (b43_debug(dev, B43_DBG_DMAVERBOSE)) {
+		if (b43_debugging(dev, B43_DBG_DMAVERBOSE)) {
 			b43_dbg(dev->wl, "Woke up TX ring %d\n", ring->index);
 		}
 	}
diff --git a/drivers/net/wireless/b43/lo.c b/drivers/net/wireless/b43/lo.c
index c16c35c..b047edd 100644
--- a/drivers/net/wireless/b43/lo.c
+++ b/drivers/net/wireless/b43/lo.c
@@ -774,7 +774,7 @@ struct b43_lo_calib *b43_calibrate_lo_setting(struct b43_wldev *dev,
 	lo_measure_restore(dev, &saved_regs);
 	b43_mac_enable(dev);
 
-	if (b43_debug(dev, B43_DBG_LO)) {
+	if (b43_debugging(dev, B43_DBG_LO)) {
 		b43_dbg(dev->wl, "LO: Calibrated for BB(%u), RF(%u,%u) "
 			"=> I=%d Q=%d\n",
 			bbatt->att, rfatt->att, rfatt->with_padmix,
@@ -971,7 +971,7 @@ void b43_lo_g_maintanance_work(struct b43_wldev *dev)
 			B43_WARN_ON(current_item_expired);
 			current_item_expired = 1;
 		}
-		if (b43_debug(dev, B43_DBG_LO)) {
+		if (b43_debugging(dev, B43_DBG_LO)) {
 			b43_dbg(dev->wl, "LO: Item BB(%u), RF(%u,%u), "
 				"I=%d, Q=%d expired\n",
 				cal->bbatt.att, cal->rfatt.att,
@@ -983,7 +983,7 @@ void b43_lo_g_maintanance_work(struct b43_wldev *dev)
 	}
 	if (current_item_expired || unlikely(list_empty(&lo->calib_list))) {
 		/* Recalibrate currently used LO setting. */
-		if (b43_debug(dev, B43_DBG_LO))
+		if (b43_debugging(dev, B43_DBG_LO))
 			b43_dbg(dev->wl, "LO: Recalibrating current LO setting\n");
 		cal = b43_calibrate_lo_setting(dev, &gphy->bbatt, &gphy->rfatt);
 		if (cal) {
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 6343873..3d2c655 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -839,9 +839,9 @@ static void rx_tkip_phase1_write(struct b43_wldev *dev, u8 index, u32 iv32,
 	index -= pairwise_keys_start;
 	B43_WARN_ON(index >= B43_NR_PAIRWISE_KEYS);
 
-	if (b43_debug(dev, B43_DBG_KEYS)) {
+	if (b43_debugging(dev, B43_DBG_KEYS)) {
 		b43_dbg(dev->wl, "rx_tkip_phase1_write : idx 0x%x, iv32 0x%x\n",
-				index, iv32);
+			index, iv32);
 	}
 	/* Write the key to the  RX tkip shared mem */
 	offset = B43_SHM_SH_TKIPTSCTTAK + index * (10 + 4);
@@ -1014,7 +1014,7 @@ static void b43_dump_keymemory(struct b43_wldev *dev)
 	u64 hf;
 	struct b43_key *key;
 
-	if (!b43_debug(dev, B43_DBG_KEYS))
+	if (!b43_debugging(dev, B43_DBG_KEYS))
 		return;
 
 	hf = b43_hf_read(dev);
@@ -1857,7 +1857,7 @@ static void b43_do_interrupt_thread(struct b43_wldev *dev)
 	b43_write32(dev, B43_MMIO_GEN_IRQ_MASK, dev->irq_mask);
 
 #if B43_DEBUG
-	if (b43_debug(dev, B43_DBG_VERBOSESTATS)) {
+	if (b43_debugging(dev, B43_DBG_VERBOSESTATS)) {
 		dev->irq_count++;
 		for (i = 0; i < ARRAY_SIZE(dev->irq_bit_count); i++) {
 			if (reason & (1 << i))
@@ -2587,7 +2587,7 @@ static void b43_gpio_cleanup(struct b43_wldev *dev)
 /* http://bcm-specs.sipsolutions.net/EnableMac */
 void b43_mac_enable(struct b43_wldev *dev)
 {
-	if (b43_debug(dev, B43_DBG_FIRMWARE)) {
+	if (b43_debugging(dev, B43_DBG_FIRMWARE)) {
 		u16 fwstate;
 
 		fwstate = b43_shm_read16(dev, B43_SHM_SHARED,
@@ -2944,7 +2944,7 @@ static void b43_periodic_every15sec(struct b43_wldev *dev)
 	wmb();
 
 #if B43_DEBUG
-	if (b43_debug(dev, B43_DBG_VERBOSESTATS)) {
+	if (b43_debugging(dev, B43_DBG_VERBOSESTATS)) {
 		unsigned int i;
 
 		b43_dbg(dev->wl, "Stats: %7u IRQs/sec, %7u TX/sec, %7u RX/sec\n",
@@ -2993,14 +2993,14 @@ static void b43_periodic_work_handler(struct work_struct *work)
 
 	if (unlikely(b43_status(dev) != B43_STAT_STARTED))
 		goto out;
-	if (b43_debug(dev, B43_DBG_PWORK_STOP))
+	if (b43_debugging(dev, B43_DBG_PWORK_STOP))
 		goto out_requeue;
 
 	do_periodic_work(dev);
 
 	dev->periodic_state++;
 out_requeue:
-	if (b43_debug(dev, B43_DBG_PWORK_FAST))
+	if (b43_debugging(dev, B43_DBG_PWORK_FAST))
 		delay = msecs_to_jiffies(50);
 	else
 		delay = round_jiffies_relative(HZ * 15);
diff --git a/drivers/net/wireless/b43/phy_g.c b/drivers/net/wireless/b43/phy_g.c
index acadae6..ef539a5 100644
--- a/drivers/net/wireless/b43/phy_g.c
+++ b/drivers/net/wireless/b43/phy_g.c
@@ -236,7 +236,7 @@ static void b43_set_txpower_g(struct b43_wldev *dev,
 	gphy->rfatt.with_padmix = !!(tx_control & B43_TXCTL_TXMIX);
 	memmove(&gphy->bbatt, bbatt, sizeof(*bbatt));
 
-	if (b43_debug(dev, B43_DBG_XMITPOWER)) {
+	if (b43_debugging(dev, B43_DBG_XMITPOWER)) {
 		b43_dbg(dev->wl, "Tuning TX-power to bbatt(%u), "
 			"rfatt(%u), tx_control(0x%02X), "
 			"tx_bias(0x%02X), tx_magn(0x%02X)\n",
@@ -2862,7 +2862,7 @@ static void b43_gphy_op_adjust_txpower(struct b43_wldev *dev)
 	gphy->rfatt.att = rfatt;
 	gphy->bbatt.att = bbatt;
 
-	if (b43_debug(dev, B43_DBG_XMITPOWER))
+	if (b43_debugging(dev, B43_DBG_XMITPOWER))
 		b43_dbg(dev->wl, "Adjusting TX power\n");
 
 	/* Adjust the hardware */
@@ -2929,7 +2929,7 @@ static enum b43_txpwr_result b43_gphy_op_recalc_txpower(struct b43_wldev *dev,
 		desired_pwr = INT_TO_Q52(phy->desired_txpower);
 	/* And limit it. max_pwr already is Q5.2 */
 	desired_pwr = clamp_val(desired_pwr, 0, max_pwr);
-	if (b43_debug(dev, B43_DBG_XMITPOWER)) {
+	if (b43_debugging(dev, B43_DBG_XMITPOWER)) {
 		b43_dbg(dev->wl,
 			"[TX power]  current = " Q52_FMT
 			" dBm,  desired = " Q52_FMT
@@ -2958,7 +2958,7 @@ static enum b43_txpwr_result b43_gphy_op_recalc_txpower(struct b43_wldev *dev,
 	bbatt_delta -= 4 * rfatt_delta;
 
 #if B43_DEBUG
-	if (b43_debug(dev, B43_DBG_XMITPOWER)) {
+	if (b43_debugging(dev, B43_DBG_XMITPOWER)) {
 		int dbm = pwr_adjust < 0 ? -pwr_adjust : pwr_adjust;
 		b43_dbg(dev->wl,
 			"[TX power deltas]  %s" Q52_FMT " dBm   =>   "
-- 
1.7.1.337.g6068.dirty


^ permalink raw reply related

* Re: [PATCH 0/2] net,man - IP_NODEFRAG option for IPv4 socket
From: Jiri Olsa @ 2010-06-19 23:32 UTC (permalink / raw)
  To: eric.dumazet, jengelh, kaber; +Cc: netdev, netfilter-devel, linux-man
In-Reply-To: <1276600052-16499-1-git-send-email-jolsa@redhat.com>

hi,
any feedback?

thanks,
jirka

On Tue, Jun 15, 2010 at 01:07:30PM +0200, Jiri Olsa wrote:
> hi,
> 
> this series contains patch for IP_NODEFRAG option for RAW sockets,
> and changes for man pages.
> 
> I'm sending this together, hope it's ok.
> 
> 1/2 - net - IP_NODEFRAG option for IPv4 socket
> 2/2 - man - IP_NODEFRAG option for IPv4 socket
> 
> wbr,
> jirka
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH  kernel 2.6.35-rc3] smc91c92_cs: fix the problem that lan & modem does not work simultaneously
From: Ken Kawasaki @ 2010-06-20  1:24 UTC (permalink / raw)
  To: netdev
In-Reply-To: <20100612191728.2d272cc1.ken_kawasaki@spring.nifty.jp>


smc91c92_cs:
  Fix the problem that lan & modem does not work simultaneously
  in the Megahertz multi-function card.
  We need to write MEGAHERTZ_ISR to retrigger interrupt.

Signed-off-by: Ken Kawasaki <ken_kawasaki@spring.nifty.jp>

--

--- linux-2.6.35-rc3/drivers/net/pcmcia/smc91c92_cs.c.orig	2010-06-19 09:42:59.000000000 +0900
+++ linux-2.6.35-rc3/drivers/net/pcmcia/smc91c92_cs.c	2010-06-19 11:25:34.000000000 +0900
@@ -1505,12 +1505,20 @@ irq_done:
 	writeb(cor & ~COR_IREQ_ENA, smc->base + MOT_LAN + CISREG_COR);
 	writeb(cor, smc->base + MOT_LAN + CISREG_COR);
     }
-#ifdef DOES_NOT_WORK
-    if (smc->base != NULL) { /* Megahertz MFC's */
-	readb(smc->base+MEGAHERTZ_ISR);
-	readb(smc->base+MEGAHERTZ_ISR);
+
+    if ((smc->base != NULL) &&  /* Megahertz MFC's */
+	(smc->manfid == MANFID_MEGAHERTZ) &&
+	(smc->cardid == PRODID_MEGAHERTZ_EM3288)) {
+
+	u_char tmp;
+	tmp = readb(smc->base+MEGAHERTZ_ISR);
+	tmp = readb(smc->base+MEGAHERTZ_ISR);
+
+	/* Retrigger interrupt if needed */
+	writeb(tmp, smc->base + MEGAHERTZ_ISR);
+	writeb(tmp, smc->base + MEGAHERTZ_ISR);
     }
-#endif
+
     spin_unlock(&smc->lock);
     return IRQ_RETVAL(handled);
 }

^ permalink raw reply

* [PATCH] net: optimize Berkeley Packet Filter (BPF) processing
From: Hagen Paul Pfeifer @ 2010-06-20  3:05 UTC (permalink / raw)
  To: netdev; +Cc: davem, Hagen Paul Pfeifer

Gcc is currenlty not in the ability to optimize the switch statement in
sk_run_filter() because of dense case labels. This patch replace the
OR'd labels with ordered sequenced case labels. The sk_chk_filter()
function is modified to patch/replace the original OPCODES in a
ordered but equivalent form. gcc is now in the ability to transform the
switch statement in sk_run_filter into a jump table of complexity O(1).

Until this patch gcc generates a sequence of conditional branches (O(n) of 567
byte .text segment size (arch x86_64):

7ff: 8b 06                 mov    (%rsi),%eax
801: 66 83 f8 35           cmp    $0x35,%ax
805: 0f 84 d0 02 00 00     je     adb <sk_run_filter+0x31d>
80b: 0f 87 07 01 00 00     ja     918 <sk_run_filter+0x15a>
811: 66 83 f8 15           cmp    $0x15,%ax
815: 0f 84 c5 02 00 00     je     ae0 <sk_run_filter+0x322>
81b: 77 73                 ja     890 <sk_run_filter+0xd2>
81d: 66 83 f8 04           cmp    $0x4,%ax
821: 0f 84 17 02 00 00     je     a3e <sk_run_filter+0x280>
827: 77 29                 ja     852 <sk_run_filter+0x94>
829: 66 83 f8 01           cmp    $0x1,%ax
[...]

With the modification the compiler translate the switch statement into
the following jump table fragment:

7ff: 66 83 3e 2c           cmpw   $0x2c,(%rsi)
803: 0f 87 1f 02 00 00     ja     a28 <sk_run_filter+0x26a>
809: 0f b7 06              movzwl (%rsi),%eax
80c: ff 24 c5 00 00 00 00  jmpq   *0x0(,%rax,8)
813: 44 89 e3              mov    %r12d,%ebx
816: e9 43 03 00 00        jmpq   b5e <sk_run_filter+0x3a0>
81b: 41 89 dc              mov    %ebx,%r12d
81e: e9 3b 03 00 00        jmpq   b5e <sk_run_filter+0x3a0>

Furthermore, I reordered the instructions to reduce cache line misses by
order the most common instruction to the start.

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
---
 include/linux/filter.h |   48 +++++++++++
 net/core/filter.c      |  212 ++++++++++++++++++++++++++++++++++++------------
 2 files changed, 209 insertions(+), 51 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 151f5d7..69b43db 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -91,6 +91,54 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define         BPF_TAX         0x00
 #define         BPF_TXA         0x80
 
+enum {
+	BPF_S_RET_K = 0,
+	BPF_S_RET_A,
+	BPF_S_ALU_ADD_K,
+	BPF_S_ALU_ADD_X,
+	BPF_S_ALU_SUB_K,
+	BPF_S_ALU_SUB_X,
+	BPF_S_ALU_MUL_K,
+	BPF_S_ALU_MUL_X,
+	BPF_S_ALU_DIV_X,
+	BPF_S_ALU_AND_K,
+	BPF_S_ALU_AND_X,
+	BPF_S_ALU_OR_K,
+	BPF_S_ALU_OR_X,
+	BPF_S_ALU_LSH_K,
+	BPF_S_ALU_LSH_X,
+	BPF_S_ALU_RSH_K,
+	BPF_S_ALU_RSH_X,
+	BPF_S_ALU_NEG,
+	BPF_S_LD_W_ABS,
+	BPF_S_LD_H_ABS,
+	BPF_S_LD_B_ABS,
+	BPF_S_LD_W_LEN,
+	BPF_S_LD_W_IND,
+	BPF_S_LD_H_IND,
+	BPF_S_LD_B_IND,
+	BPF_S_LD_IMM,
+	BPF_S_LDX_W_LEN,
+	BPF_S_LDX_B_MSH,
+	BPF_S_LDX_IMM,
+	BPF_S_MISC_TAX,
+	BPF_S_MISC_TXA,
+	BPF_S_ALU_DIV_K,
+	BPF_S_LD_MEM,
+	BPF_S_LDX_MEM,
+	BPF_S_ST,
+	BPF_S_STX,
+	BPF_S_JMP_JA,
+	BPF_S_JMP_JEQ_K,
+	BPF_S_JMP_JEQ_X,
+	BPF_S_JMP_JGE_K,
+	BPF_S_JMP_JGE_X,
+	BPF_S_JMP_JGT_K,
+	BPF_S_JMP_JGT_X,
+	BPF_S_JMP_JSET_K,
+	BPF_S_JMP_JSET_X,
+};
+
 #ifndef BPF_MAXINSNS
 #define BPF_MAXINSNS 4096
 #endif
diff --git a/net/core/filter.c b/net/core/filter.c
index da69fb7..6e3e322 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -128,87 +128,87 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
 		fentry = &filter[pc];
 
 		switch (fentry->code) {
-		case BPF_ALU|BPF_ADD|BPF_X:
+		case BPF_S_ALU_ADD_X:
 			A += X;
 			continue;
-		case BPF_ALU|BPF_ADD|BPF_K:
+		case BPF_S_ALU_ADD_K:
 			A += fentry->k;
 			continue;
-		case BPF_ALU|BPF_SUB|BPF_X:
+		case BPF_S_ALU_SUB_X:
 			A -= X;
 			continue;
-		case BPF_ALU|BPF_SUB|BPF_K:
+		case BPF_S_ALU_SUB_K:
 			A -= fentry->k;
 			continue;
-		case BPF_ALU|BPF_MUL|BPF_X:
+		case BPF_S_ALU_MUL_X:
 			A *= X;
 			continue;
-		case BPF_ALU|BPF_MUL|BPF_K:
+		case BPF_S_ALU_MUL_K:
 			A *= fentry->k;
 			continue;
-		case BPF_ALU|BPF_DIV|BPF_X:
+		case BPF_S_ALU_DIV_X:
 			if (X == 0)
 				return 0;
 			A /= X;
 			continue;
-		case BPF_ALU|BPF_DIV|BPF_K:
+		case BPF_S_ALU_DIV_K:
 			A /= fentry->k;
 			continue;
-		case BPF_ALU|BPF_AND|BPF_X:
+		case BPF_S_ALU_AND_X:
 			A &= X;
 			continue;
-		case BPF_ALU|BPF_AND|BPF_K:
+		case BPF_S_ALU_AND_K:
 			A &= fentry->k;
 			continue;
-		case BPF_ALU|BPF_OR|BPF_X:
+		case BPF_S_ALU_OR_X:
 			A |= X;
 			continue;
-		case BPF_ALU|BPF_OR|BPF_K:
+		case BPF_S_ALU_OR_K:
 			A |= fentry->k;
 			continue;
-		case BPF_ALU|BPF_LSH|BPF_X:
+		case BPF_S_ALU_LSH_X:
 			A <<= X;
 			continue;
-		case BPF_ALU|BPF_LSH|BPF_K:
+		case BPF_S_ALU_LSH_K:
 			A <<= fentry->k;
 			continue;
-		case BPF_ALU|BPF_RSH|BPF_X:
+		case BPF_S_ALU_RSH_X:
 			A >>= X;
 			continue;
-		case BPF_ALU|BPF_RSH|BPF_K:
+		case BPF_S_ALU_RSH_K:
 			A >>= fentry->k;
 			continue;
-		case BPF_ALU|BPF_NEG:
+		case BPF_S_ALU_NEG:
 			A = -A;
 			continue;
-		case BPF_JMP|BPF_JA:
+		case BPF_S_JMP_JA:
 			pc += fentry->k;
 			continue;
-		case BPF_JMP|BPF_JGT|BPF_K:
+		case BPF_S_JMP_JGT_K:
 			pc += (A > fentry->k) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JGE|BPF_K:
+		case BPF_S_JMP_JGE_K:
 			pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JEQ|BPF_K:
+		case BPF_S_JMP_JEQ_K:
 			pc += (A == fentry->k) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JSET|BPF_K:
+		case BPF_S_JMP_JSET_K:
 			pc += (A & fentry->k) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JGT|BPF_X:
+		case BPF_S_JMP_JGT_X:
 			pc += (A > X) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JGE|BPF_X:
+		case BPF_S_JMP_JGE_X:
 			pc += (A >= X) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JEQ|BPF_X:
+		case BPF_S_JMP_JEQ_X:
 			pc += (A == X) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JSET|BPF_X:
+		case BPF_S_JMP_JSET_X:
 			pc += (A & X) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_LD|BPF_W|BPF_ABS:
+		case BPF_S_LD_W_ABS:
 			k = fentry->k;
 load_w:
 			ptr = load_pointer(skb, k, 4, &tmp);
@@ -217,7 +217,7 @@ load_w:
 				continue;
 			}
 			break;
-		case BPF_LD|BPF_H|BPF_ABS:
+		case BPF_S_LD_H_ABS:
 			k = fentry->k;
 load_h:
 			ptr = load_pointer(skb, k, 2, &tmp);
@@ -226,7 +226,7 @@ load_h:
 				continue;
 			}
 			break;
-		case BPF_LD|BPF_B|BPF_ABS:
+		case BPF_S_LD_B_ABS:
 			k = fentry->k;
 load_b:
 			ptr = load_pointer(skb, k, 1, &tmp);
@@ -235,54 +235,54 @@ load_b:
 				continue;
 			}
 			break;
-		case BPF_LD|BPF_W|BPF_LEN:
+		case BPF_S_LD_W_LEN:
 			A = skb->len;
 			continue;
-		case BPF_LDX|BPF_W|BPF_LEN:
+		case BPF_S_LDX_W_LEN:
 			X = skb->len;
 			continue;
-		case BPF_LD|BPF_W|BPF_IND:
+		case BPF_S_LD_W_IND:
 			k = X + fentry->k;
 			goto load_w;
-		case BPF_LD|BPF_H|BPF_IND:
+		case BPF_S_LD_H_IND:
 			k = X + fentry->k;
 			goto load_h;
-		case BPF_LD|BPF_B|BPF_IND:
+		case BPF_S_LD_B_IND:
 			k = X + fentry->k;
 			goto load_b;
-		case BPF_LDX|BPF_B|BPF_MSH:
+		case BPF_S_LDX_B_MSH:
 			ptr = load_pointer(skb, fentry->k, 1, &tmp);
 			if (ptr != NULL) {
 				X = (*(u8 *)ptr & 0xf) << 2;
 				continue;
 			}
 			return 0;
-		case BPF_LD|BPF_IMM:
+		case BPF_S_LD_IMM:
 			A = fentry->k;
 			continue;
-		case BPF_LDX|BPF_IMM:
+		case BPF_S_LDX_IMM:
 			X = fentry->k;
 			continue;
-		case BPF_LD|BPF_MEM:
+		case BPF_S_LD_MEM:
 			A = mem[fentry->k];
 			continue;
-		case BPF_LDX|BPF_MEM:
+		case BPF_S_LDX_MEM:
 			X = mem[fentry->k];
 			continue;
-		case BPF_MISC|BPF_TAX:
+		case BPF_S_MISC_TAX:
 			X = A;
 			continue;
-		case BPF_MISC|BPF_TXA:
+		case BPF_S_MISC_TXA:
 			A = X;
 			continue;
-		case BPF_RET|BPF_K:
+		case BPF_S_RET_K:
 			return fentry->k;
-		case BPF_RET|BPF_A:
+		case BPF_S_RET_A:
 			return A;
-		case BPF_ST:
+		case BPF_S_ST:
 			mem[fentry->k] = A;
 			continue;
-		case BPF_STX:
+		case BPF_S_STX:
 			mem[fentry->k] = X;
 			continue;
 		default:
@@ -390,53 +390,128 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 		/* Only allow valid instructions */
 		switch (ftest->code) {
 		case BPF_ALU|BPF_ADD|BPF_K:
+			ftest->code = BPF_S_ALU_ADD_K;
+			break;
 		case BPF_ALU|BPF_ADD|BPF_X:
+			ftest->code = BPF_S_ALU_ADD_X;
+			break;
 		case BPF_ALU|BPF_SUB|BPF_K:
+			ftest->code = BPF_S_ALU_SUB_K;
+			break;
 		case BPF_ALU|BPF_SUB|BPF_X:
+			ftest->code = BPF_S_ALU_SUB_X;
+			break;
 		case BPF_ALU|BPF_MUL|BPF_K:
+			ftest->code = BPF_S_ALU_MUL_K;
+			break;
 		case BPF_ALU|BPF_MUL|BPF_X:
+			ftest->code = BPF_S_ALU_MUL_X;
+			break;
 		case BPF_ALU|BPF_DIV|BPF_X:
+			ftest->code = BPF_S_ALU_DIV_X;
+			break;
 		case BPF_ALU|BPF_AND|BPF_K:
+			ftest->code = BPF_S_ALU_AND_K;
+			break;
 		case BPF_ALU|BPF_AND|BPF_X:
+			ftest->code = BPF_S_ALU_AND_X;
+			break;
 		case BPF_ALU|BPF_OR|BPF_K:
+			ftest->code = BPF_S_ALU_OR_K;
+			break;
 		case BPF_ALU|BPF_OR|BPF_X:
+			ftest->code = BPF_S_ALU_OR_X;
+			break;
 		case BPF_ALU|BPF_LSH|BPF_K:
+			ftest->code = BPF_S_ALU_LSH_K;
+			break;
 		case BPF_ALU|BPF_LSH|BPF_X:
+			ftest->code = BPF_S_ALU_LSH_X;
+			break;
 		case BPF_ALU|BPF_RSH|BPF_K:
+			ftest->code = BPF_S_ALU_RSH_K;
+			break;
 		case BPF_ALU|BPF_RSH|BPF_X:
+			ftest->code = BPF_S_ALU_RSH_X;
+			break;
 		case BPF_ALU|BPF_NEG:
+			ftest->code = BPF_S_ALU_NEG;
+			break;
 		case BPF_LD|BPF_W|BPF_ABS:
+			ftest->code = BPF_S_LD_W_ABS;
+			break;
 		case BPF_LD|BPF_H|BPF_ABS:
+			ftest->code = BPF_S_LD_H_ABS;
+			break;
 		case BPF_LD|BPF_B|BPF_ABS:
+			ftest->code = BPF_S_LD_B_ABS;
+			break;
 		case BPF_LD|BPF_W|BPF_LEN:
+			ftest->code = BPF_S_LD_W_LEN;
+			break;
 		case BPF_LD|BPF_W|BPF_IND:
+			ftest->code = BPF_S_LD_W_IND;
+			break;
 		case BPF_LD|BPF_H|BPF_IND:
+			ftest->code = BPF_S_LD_H_IND;
+			break;
 		case BPF_LD|BPF_B|BPF_IND:
+			ftest->code = BPF_S_LD_B_IND;
+			break;
 		case BPF_LD|BPF_IMM:
+			ftest->code = BPF_S_LD_IMM;
+			break;
 		case BPF_LDX|BPF_W|BPF_LEN:
+			ftest->code = BPF_S_LDX_W_LEN;
+			break;
 		case BPF_LDX|BPF_B|BPF_MSH:
+			ftest->code = BPF_S_LDX_B_MSH;
+			break;
 		case BPF_LDX|BPF_IMM:
+			ftest->code = BPF_S_LDX_IMM;
+			break;
 		case BPF_MISC|BPF_TAX:
+			ftest->code = BPF_S_MISC_TAX;
+			break;
 		case BPF_MISC|BPF_TXA:
+			ftest->code = BPF_S_MISC_TXA;
+			break;
 		case BPF_RET|BPF_K:
+			ftest->code = BPF_S_RET_K;
+			break;
 		case BPF_RET|BPF_A:
+			ftest->code = BPF_S_RET_A;
 			break;
 
 		/* Some instructions need special checks */
 
-		case BPF_ALU|BPF_DIV|BPF_K:
 			/* check for division by zero */
+		case BPF_ALU|BPF_DIV|BPF_K:
 			if (ftest->k == 0)
 				return -EINVAL;
+			ftest->code = BPF_S_ALU_DIV_K;
 			break;
 
+		/* check for invalid memory addresses */
 		case BPF_LD|BPF_MEM:
+			if (ftest->k >= BPF_MEMWORDS)
+				return -EINVAL;
+			ftest->code = BPF_S_LD_MEM;
+			break;
 		case BPF_LDX|BPF_MEM:
+			if (ftest->k >= BPF_MEMWORDS)
+				return -EINVAL;
+			ftest->code = BPF_S_LDX_MEM;
+			break;
 		case BPF_ST:
+			if (ftest->k >= BPF_MEMWORDS)
+				return -EINVAL;
+			ftest->code = BPF_S_ST;
+			break;
 		case BPF_STX:
-			/* check for invalid memory addresses */
 			if (ftest->k >= BPF_MEMWORDS)
 				return -EINVAL;
+			ftest->code = BPF_S_STX;
 			break;
 
 		case BPF_JMP|BPF_JA:
@@ -447,28 +522,63 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 			 */
 			if (ftest->k >= (unsigned)(flen-pc-1))
 				return -EINVAL;
+			ftest->code = BPF_S_JMP_JA;
 			break;
 
 		case BPF_JMP|BPF_JEQ|BPF_K:
+			ftest->code = BPF_S_JMP_JEQ_K;
+			break;
 		case BPF_JMP|BPF_JEQ|BPF_X:
+			ftest->code = BPF_S_JMP_JEQ_X;
+			break;
 		case BPF_JMP|BPF_JGE|BPF_K:
+			ftest->code = BPF_S_JMP_JGE_K;
+			break;
 		case BPF_JMP|BPF_JGE|BPF_X:
+			ftest->code = BPF_S_JMP_JGE_X;
+			break;
 		case BPF_JMP|BPF_JGT|BPF_K:
+			ftest->code = BPF_S_JMP_JGT_K;
+			break;
 		case BPF_JMP|BPF_JGT|BPF_X:
+			ftest->code = BPF_S_JMP_JGT_X;
+			break;
 		case BPF_JMP|BPF_JSET|BPF_K:
+			ftest->code = BPF_S_JMP_JSET_K;
+			break;
 		case BPF_JMP|BPF_JSET|BPF_X:
+			ftest->code = BPF_S_JMP_JSET_X;
+			break;
+
+		default:
+			return -EINVAL;
+		}
+
 			/* for conditionals both must be safe */
+		switch (ftest->code) {
+		case BPF_S_JMP_JEQ_K:
+		case BPF_S_JMP_JEQ_X:
+		case BPF_S_JMP_JGE_K:
+		case BPF_S_JMP_JGE_X:
+		case BPF_S_JMP_JGT_K:
+		case BPF_S_JMP_JGT_X:
+		case BPF_S_JMP_JSET_X:
+		case BPF_S_JMP_JSET_K:
 			if (pc + ftest->jt + 1 >= flen ||
 			    pc + ftest->jf + 1 >= flen)
 				return -EINVAL;
-			break;
+		}
+	}
 
+	/* last instruction must be a RET code */
+	switch (filter[flen - 1].code) {
+	case BPF_S_RET_K:
+	case BPF_S_RET_A:
+		return 0;
+		break;
 		default:
 			return -EINVAL;
 		}
-	}
-
-	return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
 }
 EXPORT_SYMBOL(sk_chk_filter);
 
-- 
1.6.6.196.g1f735.dirty


^ permalink raw reply related

* Re: [PATCH] net: optimize Berkeley Packet Filter (BPF) processing
From: Stephen Hemminger @ 2010-06-20  5:16 UTC (permalink / raw)
  To: Hagen Paul Pfeifer; +Cc: netdev, davem
In-Reply-To: <1277003136-5522-1-git-send-email-hagen@jauu.net>

On Sun, 20 Jun 2010 05:05:36 +0200
Hagen Paul Pfeifer <hagen@jauu.net> wrote:

> Gcc is currenlty not in the ability to optimize the switch statement in
> sk_run_filter() because of dense case labels. This patch replace the
> OR'd labels with ordered sequenced case labels. The sk_chk_filter()
> function is modified to patch/replace the original OPCODES in a
> ordered but equivalent form. gcc is now in the ability to transform the
> switch statement in sk_run_filter into a jump table of complexity O(1).
> 
> Until this patch gcc generates a sequence of conditional branches (O(n) of 567
> byte .text segment size (arch x86_64):

I don't think this works because it breaks ABI compatibility for applications tha
use older versions.

^ permalink raw reply

* [PATCH] Driver-core: Always create class directories fixing the broken network drivers.
From: Eric W. Biederman @ 2010-06-20  6:20 UTC (permalink / raw)
  To: Greg KH; +Cc: Johannes Berg, netdev, Kay Sievers
In-Reply-To: <AANLkTin2sci1gSmwx-tjazYGFAcUNRKhJFI7Bt0KvVe3@mail.gmail.com>

In get_device_parent there is a check to not add a class directory
when a class device was put under another class device.  The check was
put in place as a just in case measure to not break old userspace if
any existing code happened to depend on it.  Currently the only known
way that we get a class device under a class device is due to the
rearrangement of devices that happened when the new sysfs layout was
introduced.

With the introduction of tagged sysfs directories for properly
handling network namespace support this omission in creating the class
directories went from a bad thing in terms of namespace pollution, to
actually breaking device_remove.

Currently there are two reported network device drivers that break
because the class directory was not created by the device layer.  The
usb bnep driver and the mac80211_hwsim driver.

Every solution proposed changes the sysfs layout for the affected
devices, and thus has the potential to break userspace.

Since we are changing the sysfs layout anyway, and since we are now
talking about several devices all with the same problem, all caused by
the same over conservative bit of code.  Let's kill that bit of code.

There have been other proposals to fix this but they all have been
more complicated, and none of them have actually resulted in working
code.

Any userspace that works with both the old and the new sysfs layouts
should not be affected by this change, and even if someone depends
on it we are talking a very small number of drivers overall that
are affected.

My apologoies for not fully catching this hole in the logic the
when this code was originally added.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 drivers/base/core.c |    2 --
 1 files changed, 0 insertions(+), 2 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 9630fbd..7b1c4d4 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -673,8 +673,6 @@ static struct kobject *get_device_parent(struct device *dev,
 		 */
 		if (parent == NULL)
 			parent_kobj = virtual_device_parent(dev);
-		else if (parent->class)
-			return &parent->kobj;
 		else
 			parent_kobj = &parent->kobj;

-- 
1.6.5.2.143.g8cc62

^ permalink raw reply related

* Re: [net-next-2.6 PATCH 1/8] e1000e: cleanup ethtool loopback setup code
From: Jeff Kirsher @ 2010-06-20  7:32 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, gospo, bphilips, bruce.w.allan
In-Reply-To: <20100618.221512.102550313.davem@davemloft.net>

On Fri, Jun 18, 2010 at 22:15, David Miller <davem@davemloft.net> wrote:
>
> I've applied this series however:
>
> 1) Please address Ben's concerns about turning EEE on by default
>   given that standardization is not complete yet.
>
> 2) I hate module parameters, I'd rather you create a new ethtool
>   feature bit and thus allow the setting to be modified at run
>   time.  Please create a new ethtool control flag, and remove
>   this module option.
>
> Thanks.
> --

Thank you Dave.  I know that Bruce (and company) are actively looking
into your first point.  I would think that he would have either a
response or update on the status come Monday/Tuesday.

Regarding option 2, understood and I will work out with Bruce on when
and who will do the work.  I will be taking a look at it tomorrow
(later today), if I am not able to come up with a patch in the near
future.  I will return to working on it by Wednesday.

-- 
Cheers,
Jeff

^ permalink raw reply

* Re: [PATCH] net: optimize Berkeley Packet Filter (BPF) processing
From: Hagen Paul Pfeifer @ 2010-06-20  9:50 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev, davem
In-Reply-To: <20100619221611.784f7dbc@nehalam>

* Stephen Hemminger | 2010-06-19 22:16:11 [-0700]:

>I don't think this works because it breaks ABI compatibility for applications tha
>use older versions.

Are you sure Stephen? It is a one-to-one mapping of the ABI but maybe it was
too late yesterday ... ;-)


-- 
Hagen Paul Pfeifer <hagen@jauu.net>  ||  http://jauu.net/
Telephone: +49 174 5455209           ||  Key Id: 0x98350C22
Key Fingerprint: 490F 557B 6C48 6D7E 5706 2EA2 4A22 8D45 9835 0C22

^ permalink raw reply

* Re: [RFC PATCH v7 01/19] Add a new structure for skb buffer from external.
From: Michael S. Tsirkin @ 2010-06-20 10:06 UTC (permalink / raw)
  To: Xin, Xiaohui
  Cc: Herbert Xu, Stephen Hemminger, netdev@vger.kernel.org,
	kvm@vger.kernel.org, linux-kernel@vger.kernel.org, mingo@elte.hu,
	davem@davemloft.net, jdike@linux.intel.com, Rusty Russell
In-Reply-To: <F2E9EB7348B8264F86B6AB8151CE2D7915089FE573@shsmsx502.ccr.corp.intel.com>

On Fri, Jun 18, 2010 at 03:14:18PM +0800, Xin, Xiaohui wrote:
> >-----Original Message-----
> >From: Herbert Xu [mailto:herbert@gondor.apana.org.au]
> >Sent: Friday, June 18, 2010 1:59 PM
> >To: Xin, Xiaohui
> >Cc: Stephen Hemminger; netdev@vger.kernel.org; kvm@vger.kernel.org;
> >linux-kernel@vger.kernel.org; mst@redhat.com; mingo@elte.hu; davem@davemloft.net;
> >jdike@linux.intel.com; Rusty Russell
> >Subject: Re: [RFC PATCH v7 01/19] Add a new structure for skb buffer from external.
> >
> >On Fri, Jun 18, 2010 at 01:26:49PM +0800, Xin, Xiaohui wrote:
> >>
> >> Herbert,
> >> I have questions about the idea above:
> >> 1) Since netdev_alloc_skb() is still there, and we only modify alloc_page(),
> >> then we don't need napi_gro_frags() any more, the driver's original receiving
> >> function is ok. Right?
> >
> >Well I was actually thinking about converting all drivers that
> >need this to napi_gro_frags.  But now that you mention it, yes
> >we could still keep the old interface to minimise the work.
> >
> >> 2) Is napi_gro_frags() only suitable for TCP protocol packet?
> >> I have done a small test for ixgbe driver to let it only allocate paged buffers
> >> and found kernel hangs when napi_gro_frags() receives an ARP packet.
> >
> >It should work with any packet.  In fact, I'm pretty sure the
> >other drivers (e.g., cxgb3) use that interface for all packets.
> >
> Thanks for the verification. By the way, does that mean that nearly all drivers can use the 
> same napi_gro_frags() to receive buffers though currently each driver has it's own receiving 
> function?
> 
> >> 3) As I have mentioned above, with this idea, netdev_alloc_skb() will allocate
> >> as usual, the data pointed by skb->data will be copied into the first guest buffer.
> >> That means we should reserve sufficient room in guest buffer. For PS mode
> >> supported driver (for example ixgbe), the room will be more than 128. After 128bytes,
> >> we will put the first frag data. Look into virtio-net.c the function page_to_skb()
> >> and receive_mergeable(), that means we should modify guest virtio-net driver to
> >> compute the offset as the parameter for skb_set_frag().
> >>
> >> How do you think about this? Attached is a patch to how to modify the guest driver.
> >> I reserve 512 bytes as an example, and transfer the header len of the skb in hdr->hdr_len.
> >
> >Expanding the buffer size to 512 bytes to accomodate PS mode
> >looks reasonable to me.
> >
> >However, I don't think we should increase the copy threshold to
> >512 bytes at the same time.  I don't have any figures myself but
> >I think if we are to make such a change it should be a separate
> >one and come with supporting numbers.
> >
> Let me have a look to see if I can retain the copy threshold as 128 bytes 
> and copy the header data safely.

Changing the guest virtio to match the backend is a problem,
this breaks migration etc.


> >Cheers,
> >--
> >Visit Openswan at http://www.openswan.org/
> >Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
> >Home Page: http://gondor.apana.org.au/~herbert/
> >PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: [RFC PATCH v7 01/19] Add a new structure for skb buffer from external.
From: Herbert Xu @ 2010-06-20 10:32 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Xin, Xiaohui, Stephen Hemminger, netdev@vger.kernel.org,
	kvm@vger.kernel.org, linux-kernel@vger.kernel.org, mingo@elte.hu,
	davem@davemloft.net, jdike@linux.intel.com, Rusty Russell
In-Reply-To: <20100620100631.GB4578@redhat.com>

On Sun, Jun 20, 2010 at 01:06:32PM +0300, Michael S. Tsirkin wrote:
>
> Changing the guest virtio to match the backend is a problem,
> this breaks migration etc.

As long as it's done in a backwards compatible way it should be
fine.  It's just like migrating from a backend that supports TSO
to one that doesn't.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: [RFC PATCH v7 01/19] Add a new structure for skb buffer from external.
From: Michael S. Tsirkin @ 2010-06-20 10:39 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Xin, Xiaohui, Stephen Hemminger, netdev@vger.kernel.org,
	kvm@vger.kernel.org, linux-kernel@vger.kernel.org, mingo@elte.hu,
	davem@davemloft.net, jdike@linux.intel.com, Rusty Russell
In-Reply-To: <20100620103235.GA31284@gondor.apana.org.au>

On Sun, Jun 20, 2010 at 08:32:35PM +1000, Herbert Xu wrote:
> On Sun, Jun 20, 2010 at 01:06:32PM +0300, Michael S. Tsirkin wrote:
> >
> > Changing the guest virtio to match the backend is a problem,
> > this breaks migration etc.
> 
> As long as it's done in a backwards compatible way it should be
> fine.

Possibly, but to me the need to do this implies that
we'll need another change with different hardware at the backend.

> It's just like migrating from a backend that supports TSO
> to one that doesn't.
> 
> Cheers,

Exactly. We don't support such migration.

> -- 
> Visit Openswan at http://www.openswan.org/
> Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: [PATCH] Driver-core: Always create class directories fixing the broken network drivers.
From: Kay Sievers @ 2010-06-20 10:52 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: Greg KH, Johannes Berg, netdev
In-Reply-To: <m139wiuswa.fsf_-_@fess.ebiederm.org>

On Sun, Jun 20, 2010 at 08:20, Eric W. Biederman <ebiederm@xmission.com> wrote:
>
> In get_device_parent there is a check to not add a class directory
> when a class device was put under another class device.  The check was
> put in place as a just in case measure to not break old userspace if
> any existing code happened to depend on it.  Currently the only known
> way that we get a class device under a class device is due to the
> rearrangement of devices that happened when the new sysfs layout was
> introduced.
>
> With the introduction of tagged sysfs directories for properly
> handling network namespace support this omission in creating the class
> directories went from a bad thing in terms of namespace pollution, to
> actually breaking device_remove.
>
> Currently there are two reported network device drivers that break
> because the class directory was not created by the device layer.  The
> usb bnep driver and the mac80211_hwsim driver.
>
> Every solution proposed changes the sysfs layout for the affected
> devices, and thus has the potential to break userspace.
>
> Since we are changing the sysfs layout anyway, and since we are now
> talking about several devices all with the same problem, all caused by
> the same over conservative bit of code.  Let's kill that bit of code.
>
> There have been other proposals to fix this but they all have been
> more complicated, and none of them have actually resulted in working
> code.
>
> Any userspace that works with both the old and the new sysfs layouts
> should not be affected by this change, and even if someone depends
> on it we are talking a very small number of drivers overall that
> are affected.
>
> My apologoies for not fully catching this hole in the logic the
> when this code was originally added.

We can not do this. Simply comparing the sysfs tree before and after
shows that it breaks 'input'. inputX and mouseX are now spearated by a
subdirectory, which is wrong.

As mentioned earlier, It's pretty fragile to change things in this
area, and I prefer the broken network driver-core interactions to be
fixed instead - even when they are more complicated.

Thanks,
Kay

^ permalink raw reply

* Re: [RFC PATCH v7 01/19] Add a new structure for skb buffer from external.
From: Herbert Xu @ 2010-06-20 11:02 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Xin, Xiaohui, Stephen Hemminger, netdev@vger.kernel.org,
	kvm@vger.kernel.org, linux-kernel@vger.kernel.org, mingo@elte.hu,
	davem@davemloft.net, jdike@linux.intel.com, Rusty Russell
In-Reply-To: <20100620103909.GA5285@redhat.com>

On Sun, Jun 20, 2010 at 01:39:09PM +0300, Michael S. Tsirkin wrote:
>
> > It's just like migrating from a backend that supports TSO
> > to one that doesn't.
> 
> Exactly. We don't support such migration.

Well that's something that has to be addressed in the virtio_net.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: [RFC PATCH v7 01/19] Add a new structure for skb buffer from external.
From: Michael S. Tsirkin @ 2010-06-20 11:11 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Xin, Xiaohui, Stephen Hemminger, netdev@vger.kernel.org,
	kvm@vger.kernel.org, linux-kernel@vger.kernel.org, mingo@elte.hu,
	davem@davemloft.net, jdike@linux.intel.com, Rusty Russell
In-Reply-To: <20100620110254.GA31484@gondor.apana.org.au>

On Sun, Jun 20, 2010 at 09:02:54PM +1000, Herbert Xu wrote:
> On Sun, Jun 20, 2010 at 01:39:09PM +0300, Michael S. Tsirkin wrote:
> >
> > > It's just like migrating from a backend that supports TSO
> > > to one that doesn't.
> > 
> > Exactly. We don't support such migration.
> 
> Well that's something that has to be addressed in the virtio_net.

Rather than modifying all guests, it seems much easier not to assume
specific buffer layout in host.  Copying network header around seems a
small cost.

> Cheers,
> -- 
> Visit Openswan at http://www.openswan.org/
> Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: [PATCH] Driver-core: Always create class directories fixing the broken network drivers.
From: Johannes Berg @ 2010-06-20 11:33 UTC (permalink / raw)
  To: Kay Sievers; +Cc: Eric W. Biederman, Greg KH, netdev
In-Reply-To: <AANLkTilVHF0dITpB3yJYaCnGPonhdl7NWnb-P12RAFTs@mail.gmail.com>

On Sun, 2010-06-20 at 12:52 +0200, Kay Sievers wrote:

> As mentioned earlier, It's pretty fragile to change things in this
> area, and I prefer the broken network driver-core interactions to be
> fixed instead - even when they are more complicated.

Can you _please_ offer a proper way to fix it then?

johannes

^ permalink raw reply

* Re: [RFC PATCH v7 01/19] Add a new structure for skb buffer from external.
From: Herbert Xu @ 2010-06-20 11:36 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Xin, Xiaohui, Stephen Hemminger, netdev@vger.kernel.org,
	kvm@vger.kernel.org, linux-kernel@vger.kernel.org, mingo@elte.hu,
	davem@davemloft.net, jdike@linux.intel.com, Rusty Russell
In-Reply-To: <20100620111124.GB5285@redhat.com>

On Sun, Jun 20, 2010 at 02:11:24PM +0300, Michael S. Tsirkin wrote:
>
> Rather than modifying all guests, it seems much easier not to assume
> specific buffer layout in host.  Copying network header around seems a
> small cost.

Well sure we can debate the specifics of this implementation detail.

However, the fact that virtio_net doesn't support feature renegotiation
on live migration is not a valid reason against this.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: [PATCH] Driver-core: Always create class directories fixing the broken network drivers.
From: Kay Sievers @ 2010-06-20 11:46 UTC (permalink / raw)
  To: Johannes Berg; +Cc: Eric W. Biederman, Greg KH, netdev
In-Reply-To: <1277033628.3642.1.camel@jlt3.sipsolutions.net>

On Sun, Jun 20, 2010 at 13:33, Johannes Berg <johannes@sipsolutions.net> wrote:
> On Sun, 2010-06-20 at 12:52 +0200, Kay Sievers wrote:
>
>> As mentioned earlier, It's pretty fragile to change things in this
>> area, and I prefer the broken network driver-core interactions to be
>> fixed instead - even when they are more complicated.
>
> Can you _please_ offer a proper way to fix it then?

Sorry, I have no real experience with the issues created by the
assumption that network driver need to be able to get unloaded while
in use. That's very special, always requires a
compiled-into-the-kernel part of the subsystem, and makes it hard to
work with, as we can not use any of the usual core infrastructure to
solve that.

The only real simple thing that works is splitting the module in two
modules, which isn't really something I would propose.

Maybe the wait-for in the module-exit like your recent mail suggests
works, but I did not try that. Otherwise we can solve this by changing
the net driver and by adding some needed stuff to the core to allow
in-core bus device cleanup.

The class device hierarchy should be removed for proper network
namespace support, it's nothing we properly support with the current
core code. We better don't fiddle around with stuff nobody really
knows what it breaks. Just like I ran into the 'input' stuff now,
which was a really simple case to find.

Kay

^ permalink raw reply

* Re: [RFC PATCH v7 01/19] Add a new structure for skb buffer from external.
From: Michael S. Tsirkin @ 2010-06-20 11:47 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Xin, Xiaohui, Stephen Hemminger, netdev@vger.kernel.org,
	kvm@vger.kernel.org, linux-kernel@vger.kernel.org, mingo@elte.hu,
	davem@davemloft.net, jdike@linux.intel.com, Rusty Russell
In-Reply-To: <20100620113609.GA31693@gondor.apana.org.au>

On Sun, Jun 20, 2010 at 09:36:09PM +1000, Herbert Xu wrote:
> On Sun, Jun 20, 2010 at 02:11:24PM +0300, Michael S. Tsirkin wrote:
> >
> > Rather than modifying all guests, it seems much easier not to assume
> > specific buffer layout in host.  Copying network header around seems a
> > small cost.
> 
> Well sure we can debate the specifics of this implementation detail.

Let's do this then.  So far the virtio spec avoided making layout
assumptions, leaving guests lay out data as they see fit.
Isn't it possible to keep supporting this with zero copy for hardware
that can issue DMA at arbitrary addresses?

> However, the fact that virtio_net doesn't support feature renegotiation
> on live migration is not a valid reason against this.
> 
> Cheers,

-- 
MST

^ permalink raw reply

* Re: [RFC PATCH v7 01/19] Add a new structure for skb buffer from external.
From: Herbert Xu @ 2010-06-20 11:59 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Xin, Xiaohui, Stephen Hemminger, netdev@vger.kernel.org,
	kvm@vger.kernel.org, linux-kernel@vger.kernel.org, mingo@elte.hu,
	davem@davemloft.net, jdike@linux.intel.com, Rusty Russell
In-Reply-To: <20100620114719.GC5285@redhat.com>

On Sun, Jun 20, 2010 at 02:47:19PM +0300, Michael S. Tsirkin wrote:
>
> Let's do this then.  So far the virtio spec avoided making layout
> assumptions, leaving guests lay out data as they see fit.
> Isn't it possible to keep supporting this with zero copy for hardware
> that can issue DMA at arbitrary addresses?

I think you're mistaken with respect to what is being proposed.
Raising 512 bytes isn't a hard constraint, it is merely an
optimisation for Intel NICs because their PS mode can produce
a head fragment of up to 512 bytes.

If the guest didn't allocate 512 bytes it wouldn't be the end of
the world, it'd just mean that we'd either copy whatever is in
the head fragment, or we waste 4096-X bytes of memory where X
is the number of bytes in the head.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: [PATCH] Driver-core: Always create class directories fixing the  broken network drivers.
From: Eric W. Biederman @ 2010-06-20 12:29 UTC (permalink / raw)
  To: Kay Sievers; +Cc: Johannes Berg, Greg KH, netdev
In-Reply-To: <AANLkTikZAKZA9mfb7RjaqNFnX8gCM3A9PTfH350botL8@mail.gmail.com>

Kay Sievers <kay.sievers@vrfy.org> writes:

> On Sun, Jun 20, 2010 at 13:33, Johannes Berg <johannes@sipsolutions.net> wrote:
>> On Sun, 2010-06-20 at 12:52 +0200, Kay Sievers wrote:
>>
>>> As mentioned earlier, It's pretty fragile to change things in this
>>> area, and I prefer the broken network driver-core interactions to be
>>> fixed instead - even when they are more complicated.
>>
>> Can you _please_ offer a proper way to fix it then?
>
> Sorry, I have no real experience with the issues created by the
> assumption that network driver need to be able to get unloaded while
> in use. That's very special, always requires a
> compiled-into-the-kernel part of the subsystem, and makes it hard to
> work with, as we can not use any of the usual core infrastructure to
> solve that.

So please look at https://bugzilla.kernel.org/show_bug.cgi?id=16215

That simply creates and destroys the network device as things come
and go.

I think the bnep case is much more serious because it is real hardware
not a testing simulation, and it is the second instance of this.

Calling the change broken when I can boot up and run X in that
configuration just fine is a vast overstatement.  Especially
when you don't acknowledge that the device layer is broken.

I will agree that insane amounts of backwards compatibility are a good
idea.  So I will cook up a version of my patch that adds a hack to the
device layer to only apply this change to devices of class net.

That should save let us postpone the architectural dreams for another
day.

Eric

^ permalink raw reply

* [PATCH] Driver-core: Always create network class directories in get_device_parent.
From: Eric W. Biederman @ 2010-06-20 12:46 UTC (permalink / raw)
  To: Greg KH; +Cc: Johannes Berg, netdev, Kay Sievers
In-Reply-To: <AANLkTikZAKZA9mfb7RjaqNFnX8gCM3A9PTfH350botL8@mail.gmail.com>

In get_device_parent there was added check to not add a class
directory when a class device was put under another class device.  The
check was put in place as a just in case measure to not break old
userspace if any existing code happened to depend on it.  Devices in
the input subsystem are affected by this code path so there is a
reasonable chance that some piece of user space will break if we just
remove this kludge.

At the same time there are at least two network drivers that have
potential unnecessary namespace conflicts because class directories
have not been created for their network devices.

With the introduction of tagged sysfs directories for properly
handling network namespace support this omission in creating the class
directories went from a bad thing in terms of namespace pollution, to
actually breaking device_remove.

Currently there are two reported network device drivers that break
because the class directory was not created by the device layer.  The
usb bnep driver and the mac80211_hwsim driver.

Every solution proposed changes the sysfs layout for the affected
devices, and thus has the potential to break userspace.

Since we are changing the sysfs layout anyway, and since we are now
talking about several devices all with the same problem, all caused by
the same over convservative bit of code.  Let's fix the device layer
for network devices.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 drivers/base/core.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 9630fbd..ffb8443 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -673,7 +673,7 @@ static struct kobject *get_device_parent(struct device *dev,
 		 */
 		if (parent == NULL)
 			parent_kobj = virtual_device_parent(dev);
-		else if (parent->class)
+		else if (parent->class && (strcmp(dev->class->name, "net") != 0))
 			return &parent->kobj;
 		else
 			parent_kobj = &parent->kobj;
-- 
1.6.5.2.143.g8cc62

^ permalink raw reply related

* Re: [RFC PATCH v7 01/19] Add a new structure for skb buffer from external.
From: Michael S. Tsirkin @ 2010-06-20 12:48 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Xin, Xiaohui, Stephen Hemminger, netdev@vger.kernel.org,
	kvm@vger.kernel.org, linux-kernel@vger.kernel.org, mingo@elte.hu,
	davem@davemloft.net, jdike@linux.intel.com, Rusty Russell
In-Reply-To: <20100620115926.GA31849@gondor.apana.org.au>

On Sun, Jun 20, 2010 at 09:59:26PM +1000, Herbert Xu wrote:
> On Sun, Jun 20, 2010 at 02:47:19PM +0300, Michael S. Tsirkin wrote:
> >
> > Let's do this then.  So far the virtio spec avoided making layout
> > assumptions, leaving guests lay out data as they see fit.
> > Isn't it possible to keep supporting this with zero copy for hardware
> > that can issue DMA at arbitrary addresses?
> 
> I think you're mistaken with respect to what is being proposed.
> Raising 512 bytes isn't a hard constraint, it is merely an
> optimisation for Intel NICs because their PS mode can produce
> a head fragment of up to 512 bytes.
> 
Thanks for the clarification. So what is discussed here is
the API changes that will enable this optimization?
Of couse, it makes sense to consider this to try and avoid code churn
in the future.

As a side note, I hope to see a basic zero copy implementation with
GSO/GRO that beats copy in host convincingly before work is started on
further optimizations, though.

> If the guest didn't allocate 512 bytes it wouldn't be the end of
> the world, it'd just mean that we'd either copy whatever is in
> the head fragment,
I don't know how much will copying the head cost.

> or we waste 4096-X bytes of memory where X
> is the number of bytes in the head.

This seems mostly harmless - and guest can always do a copy internally
to save memory, correct?
Note also that we lock a full page to allow DMA, anyway.

> Cheers,
> -- 
> Visit Openswan at http://www.openswan.org/
> Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: [PATCH] Driver-core: Always create class directories fixing the broken network drivers.
From: Kay Sievers @ 2010-06-20 13:37 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: Johannes Berg, Greg KH, netdev
In-Reply-To: <m1hbkx6g6p.fsf@fess.ebiederm.org>

On Sun, Jun 20, 2010 at 14:29, Eric W. Biederman <ebiederm@xmission.com> wrote:
> Kay Sievers <kay.sievers@vrfy.org> writes:
>> On Sun, Jun 20, 2010 at 13:33, Johannes Berg <johannes@sipsolutions.net> wrote:
>>> On Sun, 2010-06-20 at 12:52 +0200, Kay Sievers wrote:
>>>
>>>> As mentioned earlier, It's pretty fragile to change things in this
>>>> area, and I prefer the broken network driver-core interactions to be
>>>> fixed instead - even when they are more complicated.
>>>
>>> Can you _please_ offer a proper way to fix it then?
>>
>> Sorry, I have no real experience with the issues created by the
>> assumption that network driver need to be able to get unloaded while
>> in use. That's very special, always requires a
>> compiled-into-the-kernel part of the subsystem, and makes it hard to
>> work with, as we can not use any of the usual core infrastructure to
>> solve that.
>
> So please look at https://bugzilla.kernel.org/show_bug.cgi?id=16215
>
> That simply creates and destroys the network device as things come
> and go.

I'm still not sure, any help here would be appreciated.

> I think the bnep case is much more serious because it is real hardware
> not a testing simulation, and it is the second instance of this.
>
> Calling the change broken when I can boot up and run X in that
> configuration just fine is a vast overstatement.

Oh, I seriously would love this rule - it would make my work so much
easier. But I need to make it totally clear: "Adding intermediate
directories into 'input' sysfs it absolutely broken, regardless if
your box comes up or not. :)

X is using udev, and udev aggressively hides these details and forbids
matching such details, but many other tools which read sysfs directly,
including ones using the conceptually broken 'device' symlink will for
sure break with such changes.

> Especially
> when you don't acknowledge that the device layer is broken.

Stacking devices from different classes is broken, and not a direct
problem of the core. It is just not supported. The core might just
need to refuse that in the first place, but that's a different issue.

> I will agree that insane amounts of backwards compatibility are a good
> idea.  So I will cook up a version of my patch that adds a hack to the
> device layer to only apply this change to devices of class net.
>
> That should save let us postpone the architectural dreams for another
> day.

It's not a dream, it needs to be fixed where it is used. We can not
allow to stack classes.

Kay

^ permalink raw reply

* Re: Distributed Switch Architecture(DSA)
From: Joakim Tjernlund @ 2010-06-20 14:41 UTC (permalink / raw)
  To: Lennert Buytenhek; +Cc: netdev
In-Reply-To: <20100619185739.GQ14513@mail.wantstofly.org>

Lennert Buytenhek <buytenh@wantstofly.org> wrote on 2010/06/19 20:57:39:
>
> On Sat, Jun 19, 2010 at 08:48:31PM +0200, Joakim Tjernlund wrote:
>
> > > > > > Not sure how one would express whether VLAN tags should be stripped
> > > > > > off or not when egressing the HW switch's physical port.
> > > > >
> > > > > If you transmit a packet onto 'lan', it will be sent to the switch chip
> > > > > with an "untagged" DSA tag.  If you transmit a packet onto 'lan.123',
> > > > > it will be sent to the switch chip with a "tagged" DSA tag.  See
> > > > > net/dsa/tag_dsa.c for details.
> > > >
> > > > Ah, now I get it, thanks.
> > > > However, how does this work for LAN to LAN pkgs? LAN1 and LAN2 could be
> > > > in the same VLAN but one is implicit(port) VLAN and the
> > > > other is explicit.
> > >
> > > If you tell the HW switch to forward these packets, they will never
> > > appear at the CPU interface, so the DSA tagging/untagging doesn't enter
> > > the picture.
> >
> > "tell the HW switch"? Doesn't DSA do that already?
>
> Not in its current iteration, as I've explained in previous emails.

Sorry, I didn't quite get that.

>
>
> > If not, what is the point of DSA then if it doesn't use the native
> > forwarding capabilities of the HW switch?
>
> The point is and always was to provide a framework for proper integration
> of hardware switch chips into the Linux kernel.  This framework doesn't
> become useless just because it doesn't already support every single
> hardware feature at this point.

Right, sorry if I sounded a bit harsh.

So DSA currently does a very minimal config of the HW switch to get
things going.
If you want to do something more fancy one has to
add a control plane to DSA which would possibly talk
to a user space app. Is that correct?

>
>
> > > > How do I config the HW switch to do that?
> > >
> > > Tell the switch that the vlan is native on one of the ports but not on
> > > the other.  It's been a while since I looked at the chip docs but there
> > > are ways of doing this.
> >
> > The current DSA impl. does not support this? There should be some
> > way to manage this within the DSA framework.
>
> Have you even tried the DSA code?

Not yet and I don't have any MV HW either :(


^ permalink raw reply

* Re: [RFC PATCH v7 01/19] Add a new structure for skb buffer from external.
From: Ben Hutchings @ 2010-06-20 15:19 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Michael S. Tsirkin, Xin, Xiaohui, Stephen Hemminger,
	netdev@vger.kernel.org, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org, mingo@elte.hu, davem@davemloft.net,
	jdike@linux.intel.com, Rusty Russell
In-Reply-To: <20100620115926.GA31849@gondor.apana.org.au>

On Sun, 2010-06-20 at 21:59 +1000, Herbert Xu wrote:
> On Sun, Jun 20, 2010 at 02:47:19PM +0300, Michael S. Tsirkin wrote:
> >
> > Let's do this then.  So far the virtio spec avoided making layout
> > assumptions, leaving guests lay out data as they see fit.
> > Isn't it possible to keep supporting this with zero copy for hardware
> > that can issue DMA at arbitrary addresses?
> 
> I think you're mistaken with respect to what is being proposed.
> Raising 512 bytes isn't a hard constraint, it is merely an
> optimisation for Intel NICs because their PS mode can produce
> a head fragment of up to 512 bytes.
> 
> If the guest didn't allocate 512 bytes it wouldn't be the end of
> the world, it'd just mean that we'd either copy whatever is in
> the head fragment, or we waste 4096-X bytes of memory where X
> is the number of bytes in the head.

If I understand correctly what this 'PS mode' is (I haven't seen the
documentation for it), it is a feature that Microsoft requested from
hardware vendors for use in Hyper-V.  As a result, the SFC9000 family
and presumably other controllers also implement something similar.

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox