Netdev List
 help / color / mirror / Atom feed
* [PATCH] USB: remove dbg() usage in USB networking drivers
From: Greg Kroah-Hartman @ 2012-09-19 17:13 UTC (permalink / raw)
  To: netdev-u79uwXL29TY76Z2rM5mHXA
  Cc: linux-usb-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

From: Greg Kroah-Hartman <gregkh-hQyY1W1yCW8ekmWlsbkhG0B+6BGkLq7r@public.gmane.org>

The dbg() USB macro is so old, it predates me.  The USB networking drivers are
the last hold-out using this macro, and we want to get rid of it, so replace
the usage of it with the proper netdev_dbg() or dev_dbg() (depending on the
context) calls.

Signed-off-by: Greg Kroah-Hartman <gregkh-hQyY1W1yCW8ekmWlsbkhG0B+6BGkLq7r@public.gmane.org>
---
 drivers/net/usb/asix_devices.c |   36 ++++++-----
 drivers/net/usb/catc.c         |   55 +++++++++-------
 drivers/net/usb/gl620a.c       |   10 ++-
 drivers/net/usb/kaweth.c       |  134 ++++++++++++++++++++---------------------
 drivers/net/usb/net1080.c      |   46 +++++++-------
 drivers/net/usb/rtl8150.c      |    6 -
 6 files changed, 152 insertions(+), 135 deletions(-)

Dave, if I can take this through my usb-next tree, I can finally get rid of the
dbg() macro for good.  Or you can take it, and I can wait to drop dbg() until
after 3.7-rc1, which is also fine with me, which ever is easiest for you.

diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 4fd48df..8d5fdf1 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -221,7 +221,8 @@ static int ax88172_bind(struct usbnet *dev, struct usb_interface *intf)
 	/* Get the MAC address */
 	ret = asix_read_cmd(dev, AX88172_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf);
 	if (ret < 0) {
-		dbg("read AX_CMD_READ_NODE_ID failed: %d", ret);
+		netdev_dbg(dev->net, "read AX_CMD_READ_NODE_ID failed: %d\n",
+			   ret);
 		goto out;
 	}
 	memcpy(dev->net->dev_addr, buf, ETH_ALEN);
@@ -303,7 +304,7 @@ static int ax88772_reset(struct usbnet *dev)
 
 	ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, embd_phy, 0, 0, NULL);
 	if (ret < 0) {
-		dbg("Select PHY #1 failed: %d", ret);
+		netdev_dbg(dev->net, "Select PHY #1 failed: %d\n", ret);
 		goto out;
 	}
 
@@ -331,13 +332,13 @@ static int ax88772_reset(struct usbnet *dev)
 
 	msleep(150);
 	rx_ctl = asix_read_rx_ctl(dev);
-	dbg("RX_CTL is 0x%04x after software reset", rx_ctl);
+	netdev_dbg(dev->net, "RX_CTL is 0x%04x after software reset\n", rx_ctl);
 	ret = asix_write_rx_ctl(dev, 0x0000);
 	if (ret < 0)
 		goto out;
 
 	rx_ctl = asix_read_rx_ctl(dev);
-	dbg("RX_CTL is 0x%04x setting to 0x0000", rx_ctl);
+	netdev_dbg(dev->net, "RX_CTL is 0x%04x setting to 0x0000\n", rx_ctl);
 
 	ret = asix_sw_reset(dev, AX_SWRESET_PRL);
 	if (ret < 0)
@@ -364,7 +365,7 @@ static int ax88772_reset(struct usbnet *dev)
 				AX88772_IPG0_DEFAULT | AX88772_IPG1_DEFAULT,
 				AX88772_IPG2_DEFAULT, 0, NULL);
 	if (ret < 0) {
-		dbg("Write IPG,IPG1,IPG2 failed: %d", ret);
+		netdev_dbg(dev->net, "Write IPG,IPG1,IPG2 failed: %d\n", ret);
 		goto out;
 	}
 
@@ -381,10 +382,13 @@ static int ax88772_reset(struct usbnet *dev)
 		goto out;
 
 	rx_ctl = asix_read_rx_ctl(dev);
-	dbg("RX_CTL is 0x%04x after all initializations", rx_ctl);
+	netdev_dbg(dev->net, "RX_CTL is 0x%04x after all initializations\n",
+		   rx_ctl);
 
 	rx_ctl = asix_read_medium_status(dev);
-	dbg("Medium Status is 0x%04x after all initializations", rx_ctl);
+	netdev_dbg(dev->net,
+		   "Medium Status is 0x%04x after all initializations\n",
+		   rx_ctl);
 
 	return 0;
 
@@ -416,7 +420,7 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
 	/* Get the MAC address */
 	ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf);
 	if (ret < 0) {
-		dbg("Failed to read MAC address: %d", ret);
+		netdev_dbg(dev->net, "Failed to read MAC address: %d\n", ret);
 		return ret;
 	}
 	memcpy(dev->net->dev_addr, buf, ETH_ALEN);
@@ -439,7 +443,7 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
 	/* Reset the PHY to normal operation mode */
 	ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, embd_phy, 0, 0, NULL);
 	if (ret < 0) {
-		dbg("Select PHY #1 failed: %d", ret);
+		netdev_dbg(dev->net, "Select PHY #1 failed: %d\n", ret);
 		return ret;
 	}
 
@@ -459,7 +463,7 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
 
 	/* Read PHYID register *AFTER* the PHY was reset properly */
 	phyid = asix_get_phyid(dev);
-	dbg("PHYID=0x%08x", phyid);
+	netdev_dbg(dev->net, "PHYID=0x%08x\n", phyid);
 
 	/* Asix framing packs multiple eth frames into a 2K usb bulk transfer */
 	if (dev->driver_info->flags & FLAG_FRAMING_AX) {
@@ -575,13 +579,13 @@ static int ax88178_reset(struct usbnet *dev)
 	u32 phyid;
 
 	asix_read_cmd(dev, AX_CMD_READ_GPIOS, 0, 0, 1, &status);
-	dbg("GPIO Status: 0x%04x", status);
+	netdev_dbg(dev->net, "GPIO Status: 0x%04x\n", status);
 
 	asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0, 0, 0, NULL);
 	asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x0017, 0, 2, &eeprom);
 	asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0, 0, 0, NULL);
 
-	dbg("EEPROM index 0x17 is 0x%04x", eeprom);
+	netdev_dbg(dev->net, "EEPROM index 0x17 is 0x%04x\n", eeprom);
 
 	if (eeprom == cpu_to_le16(0xffff)) {
 		data->phymode = PHY_MODE_MARVELL;
@@ -592,7 +596,7 @@ static int ax88178_reset(struct usbnet *dev)
 		data->ledmode = le16_to_cpu(eeprom) >> 8;
 		gpio0 = (le16_to_cpu(eeprom) & 0x80) ? 0 : 1;
 	}
-	dbg("GPIO0: %d, PhyMode: %d", gpio0, data->phymode);
+	netdev_dbg(dev->net, "GPIO0: %d, PhyMode: %d\n", gpio0, data->phymode);
 
 	/* Power up external GigaPHY through AX88178 GPIO pin */
 	asix_write_gpio(dev, AX_GPIO_RSE | AX_GPIO_GPO_1 | AX_GPIO_GPO1EN, 40);
@@ -601,14 +605,14 @@ static int ax88178_reset(struct usbnet *dev)
 		asix_write_gpio(dev, 0x001c, 300);
 		asix_write_gpio(dev, 0x003c, 30);
 	} else {
-		dbg("gpio phymode == 1 path");
+		netdev_dbg(dev->net, "gpio phymode == 1 path\n");
 		asix_write_gpio(dev, AX_GPIO_GPO1EN, 30);
 		asix_write_gpio(dev, AX_GPIO_GPO1EN | AX_GPIO_GPO_1, 30);
 	}
 
 	/* Read PHYID register *AFTER* powering up PHY */
 	phyid = asix_get_phyid(dev);
-	dbg("PHYID=0x%08x", phyid);
+	netdev_dbg(dev->net, "PHYID=0x%08x\n", phyid);
 
 	/* Set AX88178 to enable MII/GMII/RGMII interface for external PHY */
 	asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, 0, 0, 0, NULL);
@@ -770,7 +774,7 @@ static int ax88178_bind(struct usbnet *dev, struct usb_interface *intf)
 	/* Get the MAC address */
 	ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf);
 	if (ret < 0) {
-		dbg("Failed to read MAC address: %d", ret);
+		netdev_dbg(dev->net, "Failed to read MAC address: %d\n", ret);
 		return ret;
 	}
 	memcpy(dev->net->dev_addr, buf, ETH_ALEN);
diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c
index 26c5beb..6bf5672 100644
--- a/drivers/net/usb/catc.c
+++ b/drivers/net/usb/catc.c
@@ -236,7 +236,8 @@ static void catc_rx_done(struct urb *urb)
 	}
 
 	if (status) {
-		dbg("rx_done, status %d, length %d", status, urb->actual_length);
+		dev_dbg(&urb->dev->dev, "rx_done, status %d, length %d",
+			status, urb->actual_length);
 		return;
 	}
 
@@ -275,10 +276,11 @@ static void catc_rx_done(struct urb *urb)
 		if (atomic_read(&catc->recq_sz)) {
 			int state;
 			atomic_dec(&catc->recq_sz);
-			dbg("getting extra packet");
+			netdev_dbg(catc->netdev, "getting extra packet\n");
 			urb->dev = catc->usbdev;
 			if ((state = usb_submit_urb(urb, GFP_ATOMIC)) < 0) {
-				dbg("submit(rx_urb) status %d", state);
+				netdev_dbg(catc->netdev,
+					   "submit(rx_urb) status %d\n", state);
 			}
 		} else {
 			clear_bit(RX_RUNNING, &catc->flags);
@@ -317,18 +319,20 @@ static void catc_irq_done(struct urb *urb)
 		return;
 	/* -EPIPE:  should clear the halt */
 	default:		/* error */
-		dbg("irq_done, status %d, data %02x %02x.", status, data[0], data[1]);
+		dev_dbg(&urb->dev->dev,
+			"irq_done, status %d, data %02x %02x.\n",
+			status, data[0], data[1]);
 		goto resubmit;
 	}
 
 	if (linksts == LinkGood) {
 		netif_carrier_on(catc->netdev);
-		dbg("link ok");
+		netdev_dbg(catc->netdev, "link ok\n");
 	}
 
 	if (linksts == LinkBad) {
 		netif_carrier_off(catc->netdev);
-		dbg("link bad");
+		netdev_dbg(catc->netdev, "link bad\n");
 	}
 
 	if (hasdata) {
@@ -385,7 +389,7 @@ static void catc_tx_done(struct urb *urb)
 	int r, status = urb->status;
 
 	if (status == -ECONNRESET) {
-		dbg("Tx Reset.");
+		dev_dbg(&urb->dev->dev, "Tx Reset.\n");
 		urb->status = 0;
 		catc->netdev->trans_start = jiffies;
 		catc->netdev->stats.tx_errors++;
@@ -395,7 +399,8 @@ static void catc_tx_done(struct urb *urb)
 	}
 
 	if (status) {
-		dbg("tx_done, status %d, length %d", status, urb->actual_length);
+		dev_dbg(&urb->dev->dev, "tx_done, status %d, length %d\n",
+			status, urb->actual_length);
 		return;
 	}
 
@@ -511,7 +516,8 @@ static void catc_ctrl_done(struct urb *urb)
 	int status = urb->status;
 
 	if (status)
-		dbg("ctrl_done, status %d, len %d.", status, urb->actual_length);
+		dev_dbg(&urb->dev->dev, "ctrl_done, status %d, len %d.\n",
+			status, urb->actual_length);
 
 	spin_lock_irqsave(&catc->ctrl_lock, flags);
 
@@ -667,7 +673,9 @@ static void catc_set_multicast_list(struct net_device *netdev)
 		f5u011_mchash_async(catc, catc->multicast);
 		if (catc->rxmode[0] != rx) {
 			catc->rxmode[0] = rx;
-			dbg("Setting RX mode to %2.2X %2.2X", catc->rxmode[0], catc->rxmode[1]);
+			netdev_dbg(catc->netdev,
+				   "Setting RX mode to %2.2X %2.2X\n",
+				   catc->rxmode[0], catc->rxmode[1]);
 			f5u011_rxmode_async(catc, catc->rxmode);
 		}
 	}
@@ -766,6 +774,7 @@ static const struct net_device_ops catc_netdev_ops = {
 
 static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id)
 {
+	struct device *dev = &intf->dev;
 	struct usb_device *usbdev = interface_to_usbdev(intf);
 	struct net_device *netdev;
 	struct catc *catc;
@@ -774,7 +783,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id
 
 	if (usb_set_interface(usbdev,
 			intf->altsetting->desc.bInterfaceNumber, 1)) {
-                dev_err(&intf->dev, "Can't set altsetting 1.\n");
+		dev_err(dev, "Can't set altsetting 1.\n");
 		return -EIO;
 	}
 
@@ -817,7 +826,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id
 	if (le16_to_cpu(usbdev->descriptor.idVendor) == 0x0423 && 
 	    le16_to_cpu(usbdev->descriptor.idProduct) == 0xa &&
 	    le16_to_cpu(catc->usbdev->descriptor.bcdDevice) == 0x0130) {
-		dbg("Testing for f5u011");
+		dev_dbg(dev, "Testing for f5u011\n");
 		catc->is_f5u011 = 1;		
 		atomic_set(&catc->recq_sz, 0);
 		pktsz = RX_PKT_SZ;
@@ -838,7 +847,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id
                 catc->irq_buf, 2, catc_irq_done, catc, 1);
 
 	if (!catc->is_f5u011) {
-		dbg("Checking memory size\n");
+		dev_dbg(dev, "Checking memory size\n");
 
 		i = 0x12345678;
 		catc_write_mem(catc, 0x7a80, &i, 4);
@@ -850,7 +859,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id
 		case 0x12345678:
 			catc_set_reg(catc, TxBufCount, 8);
 			catc_set_reg(catc, RxBufCount, 32);
-			dbg("64k Memory\n");
+			dev_dbg(dev, "64k Memory\n");
 			break;
 		default:
 			dev_warn(&intf->dev,
@@ -858,49 +867,49 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id
 		case 0x87654321:
 			catc_set_reg(catc, TxBufCount, 4);
 			catc_set_reg(catc, RxBufCount, 16);
-			dbg("32k Memory\n");
+			dev_dbg(dev, "32k Memory\n");
 			break;
 		}
 	  
-		dbg("Getting MAC from SEEROM.");
+		dev_dbg(dev, "Getting MAC from SEEROM.\n");
 	  
 		catc_get_mac(catc, netdev->dev_addr);
 		
-		dbg("Setting MAC into registers.");
+		dev_dbg(dev, "Setting MAC into registers.\n");
 	  
 		for (i = 0; i < 6; i++)
 			catc_set_reg(catc, StationAddr0 - i, netdev->dev_addr[i]);
 		
-		dbg("Filling the multicast list.");
+		dev_dbg(dev, "Filling the multicast list.\n");
 	  
 		memset(broadcast, 0xff, 6);
 		catc_multicast(broadcast, catc->multicast);
 		catc_multicast(netdev->dev_addr, catc->multicast);
 		catc_write_mem(catc, 0xfa80, catc->multicast, 64);
 		
-		dbg("Clearing error counters.");
+		dev_dbg(dev, "Clearing error counters.\n");
 		
 		for (i = 0; i < 8; i++)
 			catc_set_reg(catc, EthStats + i, 0);
 		catc->last_stats = jiffies;
 		
-		dbg("Enabling.");
+		dev_dbg(dev, "Enabling.\n");
 		
 		catc_set_reg(catc, MaxBurst, RX_MAX_BURST);
 		catc_set_reg(catc, OpModes, OpTxMerge | OpRxMerge | OpLenInclude | Op3MemWaits);
 		catc_set_reg(catc, LEDCtrl, LEDLink);
 		catc_set_reg(catc, RxUnit, RxEnable | RxPolarity | RxMultiCast);
 	} else {
-		dbg("Performing reset\n");
+		dev_dbg(dev, "Performing reset\n");
 		catc_reset(catc);
 		catc_get_mac(catc, netdev->dev_addr);
 		
-		dbg("Setting RX Mode");
+		dev_dbg(dev, "Setting RX Mode\n");
 		catc->rxmode[0] = RxEnable | RxPolarity | RxMultiCast;
 		catc->rxmode[1] = 0;
 		f5u011_rxmode(catc, catc->rxmode);
 	}
-	dbg("Init done.");
+	dev_dbg(dev, "Init done.\n");
 	printk(KERN_INFO "%s: %s USB Ethernet at usb-%s-%s, %pM.\n",
 	       netdev->name, (catc->is_f5u011) ? "Belkin F5U011" : "CATC EL1210A NetMate",
 	       usbdev->bus->bus_name, usbdev->devpath, netdev->dev_addr);
diff --git a/drivers/net/usb/gl620a.c b/drivers/net/usb/gl620a.c
index db3c802..a7e3f4e 100644
--- a/drivers/net/usb/gl620a.c
+++ b/drivers/net/usb/gl620a.c
@@ -91,7 +91,9 @@ static int genelink_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 	// get the packet count of the received skb
 	count = le32_to_cpu(header->packet_count);
 	if (count > GL_MAX_TRANSMIT_PACKETS) {
-		dbg("genelink: invalid received packet count %u", count);
+		netdev_dbg(dev->net,
+			   "genelink: invalid received packet count %u\n",
+			   count);
 		return 0;
 	}
 
@@ -107,7 +109,8 @@ static int genelink_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 
 		// this may be a broken packet
 		if (size > GL_MAX_PACKET_LEN) {
-			dbg("genelink: invalid rx length %d", size);
+			netdev_dbg(dev->net, "genelink: invalid rx length %d\n",
+				   size);
 			return 0;
 		}
 
@@ -133,7 +136,8 @@ static int genelink_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 	skb_pull(skb, 4);
 
 	if (skb->len > GL_MAX_PACKET_LEN) {
-		dbg("genelink: invalid rx length %d", skb->len);
+		netdev_dbg(dev->net, "genelink: invalid rx length %d\n",
+			   skb->len);
 		return 0;
 	}
 	return 1;
diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c
index c3d0349..c75e11e 100644
--- a/drivers/net/usb/kaweth.c
+++ b/drivers/net/usb/kaweth.c
@@ -267,19 +267,16 @@ static int kaweth_control(struct kaweth_device *kaweth,
 	struct usb_ctrlrequest *dr;
 	int retval;
 
-	dbg("kaweth_control()");
+	netdev_dbg(kaweth->net, "kaweth_control()\n");
 
 	if(in_interrupt()) {
-		dbg("in_interrupt()");
+		netdev_dbg(kaweth->net, "in_interrupt()\n");
 		return -EBUSY;
 	}
 
 	dr = kmalloc(sizeof(struct usb_ctrlrequest), GFP_ATOMIC);
-
-	if (!dr) {
-		dbg("kmalloc() failed");
+	if (!dr)
 		return -ENOMEM;
-	}
 
 	dr->bRequestType = requesttype;
 	dr->bRequest = request;
@@ -305,7 +302,7 @@ static int kaweth_read_configuration(struct kaweth_device *kaweth)
 {
 	int retval;
 
-	dbg("Reading kaweth configuration");
+	netdev_dbg(kaweth->net, "Reading kaweth configuration\n");
 
 	retval = kaweth_control(kaweth,
 				usb_rcvctrlpipe(kaweth->dev, 0),
@@ -327,7 +324,7 @@ static int kaweth_set_urb_size(struct kaweth_device *kaweth, __u16 urb_size)
 {
 	int retval;
 
-	dbg("Setting URB size to %d", (unsigned)urb_size);
+	netdev_dbg(kaweth->net, "Setting URB size to %d\n", (unsigned)urb_size);
 
 	retval = kaweth_control(kaweth,
 				usb_sndctrlpipe(kaweth->dev, 0),
@@ -349,7 +346,7 @@ static int kaweth_set_sofs_wait(struct kaweth_device *kaweth, __u16 sofs_wait)
 {
 	int retval;
 
-	dbg("Set SOFS wait to %d", (unsigned)sofs_wait);
+	netdev_dbg(kaweth->net, "Set SOFS wait to %d\n", (unsigned)sofs_wait);
 
 	retval = kaweth_control(kaweth,
 				usb_sndctrlpipe(kaweth->dev, 0),
@@ -372,7 +369,8 @@ static int kaweth_set_receive_filter(struct kaweth_device *kaweth,
 {
 	int retval;
 
-	dbg("Set receive filter to %d", (unsigned)receive_filter);
+	netdev_dbg(kaweth->net, "Set receive filter to %d\n",
+		   (unsigned)receive_filter);
 
 	retval = kaweth_control(kaweth,
 				usb_sndctrlpipe(kaweth->dev, 0),
@@ -421,12 +419,13 @@ static int kaweth_download_firmware(struct kaweth_device *kaweth,
 	kaweth->firmware_buf[4] = type;
 	kaweth->firmware_buf[5] = interrupt;
 
-	dbg("High: %i, Low:%i", kaweth->firmware_buf[3],
+	netdev_dbg(kaweth->net, "High: %i, Low:%i\n", kaweth->firmware_buf[3],
 		   kaweth->firmware_buf[2]);
 
-	dbg("Downloading firmware at %p to kaweth device at %p",
-	    fw->data, kaweth);
-	dbg("Firmware length: %d", data_len);
+	netdev_dbg(kaweth->net,
+		   "Downloading firmware at %p to kaweth device at %p\n",
+		   fw->data, kaweth);
+	netdev_dbg(kaweth->net, "Firmware length: %d\n", data_len);
 
 	return kaweth_control(kaweth,
 		              usb_sndctrlpipe(kaweth->dev, 0),
@@ -454,7 +453,7 @@ static int kaweth_trigger_firmware(struct kaweth_device *kaweth,
 	kaweth->firmware_buf[6] = 0x00;
 	kaweth->firmware_buf[7] = 0x00;
 
-	dbg("Triggering firmware");
+	netdev_dbg(kaweth->net, "Triggering firmware\n");
 
 	return kaweth_control(kaweth,
 			      usb_sndctrlpipe(kaweth->dev, 0),
@@ -474,11 +473,11 @@ static int kaweth_reset(struct kaweth_device *kaweth)
 {
 	int result;
 
-	dbg("kaweth_reset(%p)", kaweth);
+	netdev_dbg(kaweth->net, "kaweth_reset(%p)\n", kaweth);
 	result = usb_reset_configuration(kaweth->dev);
 	mdelay(10);
 
-	dbg("kaweth_reset() returns %d.",result);
+	netdev_dbg(kaweth->net, "kaweth_reset() returns %d.\n", result);
 
 	return result;
 }
@@ -595,6 +594,7 @@ static void kaweth_async_set_rx_mode(struct kaweth_device *kaweth);
  ****************************************************************/
 static void kaweth_usb_receive(struct urb *urb)
 {
+	struct device *dev = &urb->dev->dev;
 	struct kaweth_device *kaweth = urb->context;
 	struct net_device *net = kaweth->net;
 	int status = urb->status;
@@ -610,25 +610,25 @@ static void kaweth_usb_receive(struct urb *urb)
 		kaweth->stats.rx_errors++;
 		kaweth->end = 1;
 		wake_up(&kaweth->term_wait);
-		dbg("Status was -EPIPE.");
+		dev_dbg(dev, "Status was -EPIPE.\n");
 		return;
 	}
 	if (unlikely(status == -ECONNRESET || status == -ESHUTDOWN)) {
 		/* we are killed - set a flag and wake the disconnect handler */
 		kaweth->end = 1;
 		wake_up(&kaweth->term_wait);
-		dbg("Status was -ECONNRESET or -ESHUTDOWN.");
+		dev_dbg(dev, "Status was -ECONNRESET or -ESHUTDOWN.\n");
 		return;
 	}
 	if (unlikely(status == -EPROTO || status == -ETIME ||
 		     status == -EILSEQ)) {
 		kaweth->stats.rx_errors++;
-		dbg("Status was -EPROTO, -ETIME, or -EILSEQ.");
+		dev_dbg(dev, "Status was -EPROTO, -ETIME, or -EILSEQ.\n");
 		return;
 	}
 	if (unlikely(status == -EOVERFLOW)) {
 		kaweth->stats.rx_errors++;
-		dbg("Status was -EOVERFLOW.");
+		dev_dbg(dev, "Status was -EOVERFLOW.\n");
 	}
 	spin_lock(&kaweth->device_lock);
 	if (IS_BLOCKED(kaweth->status)) {
@@ -687,7 +687,7 @@ static int kaweth_open(struct net_device *net)
 	struct kaweth_device *kaweth = netdev_priv(net);
 	int res;
 
-	dbg("Opening network device.");
+	netdev_dbg(kaweth->net, "Opening network device.\n");
 
 	res = usb_autopm_get_interface(kaweth->intf);
 	if (res) {
@@ -787,7 +787,8 @@ static void kaweth_usb_transmit_complete(struct urb *urb)
 
 	if (unlikely(status != 0))
 		if (status != -ENOENT)
-			dbg("%s: TX status %d.", kaweth->net->name, status);
+			dev_dbg(&urb->dev->dev, "%s: TX status %d.\n",
+				kaweth->net->name, status);
 
 	netif_wake_queue(kaweth->net);
 	dev_kfree_skb_irq(skb);
@@ -871,7 +872,7 @@ static void kaweth_set_rx_mode(struct net_device *net)
                                      KAWETH_PACKET_FILTER_BROADCAST |
 		                     KAWETH_PACKET_FILTER_MULTICAST;
 
-	dbg("Setting Rx mode to %d", packet_filter_bitmap);
+	netdev_dbg(net, "Setting Rx mode to %d\n", packet_filter_bitmap);
 
 	netif_stop_queue(net);
 
@@ -916,7 +917,8 @@ static void kaweth_async_set_rx_mode(struct kaweth_device *kaweth)
 			result);
 	}
 	else {
-		dbg("Set Rx mode to %d", packet_filter_bitmap);
+		netdev_dbg(kaweth->net, "Set Rx mode to %d\n",
+			   packet_filter_bitmap);
 	}
 }
 
@@ -951,7 +953,7 @@ static int kaweth_suspend(struct usb_interface *intf, pm_message_t message)
 	struct kaweth_device *kaweth = usb_get_intfdata(intf);
 	unsigned long flags;
 
-	dbg("Suspending device");
+	dev_dbg(&intf->dev, "Suspending device\n");
 	spin_lock_irqsave(&kaweth->device_lock, flags);
 	kaweth->status |= KAWETH_STATUS_SUSPENDING;
 	spin_unlock_irqrestore(&kaweth->device_lock, flags);
@@ -968,7 +970,7 @@ static int kaweth_resume(struct usb_interface *intf)
 	struct kaweth_device *kaweth = usb_get_intfdata(intf);
 	unsigned long flags;
 
-	dbg("Resuming device");
+	dev_dbg(&intf->dev, "Resuming device\n");
 	spin_lock_irqsave(&kaweth->device_lock, flags);
 	kaweth->status &= ~KAWETH_STATUS_SUSPENDING;
 	spin_unlock_irqrestore(&kaweth->device_lock, flags);
@@ -1003,36 +1005,37 @@ static int kaweth_probe(
 		const struct usb_device_id *id      /* from id_table */
 	)
 {
-	struct usb_device *dev = interface_to_usbdev(intf);
+	struct device *dev = &intf->dev;
+	struct usb_device *udev = interface_to_usbdev(intf);
 	struct kaweth_device *kaweth;
 	struct net_device *netdev;
 	const eth_addr_t bcast_addr = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
 	int result = 0;
 
-	dbg("Kawasaki Device Probe (Device number:%d): 0x%4.4x:0x%4.4x:0x%4.4x",
-		 dev->devnum,
-		 le16_to_cpu(dev->descriptor.idVendor),
-		 le16_to_cpu(dev->descriptor.idProduct),
-		 le16_to_cpu(dev->descriptor.bcdDevice));
+	dev_dbg(dev,
+		"Kawasaki Device Probe (Device number:%d): 0x%4.4x:0x%4.4x:0x%4.4x\n",
+		udev->devnum, le16_to_cpu(udev->descriptor.idVendor),
+		le16_to_cpu(udev->descriptor.idProduct),
+		le16_to_cpu(udev->descriptor.bcdDevice));
 
-	dbg("Device at %p", dev);
+	dev_dbg(dev, "Device at %p\n", udev);
 
-	dbg("Descriptor length: %x type: %x",
-		 (int)dev->descriptor.bLength,
-		 (int)dev->descriptor.bDescriptorType);
+	dev_dbg(dev, "Descriptor length: %x type: %x\n",
+		(int)udev->descriptor.bLength,
+		(int)udev->descriptor.bDescriptorType);
 
 	netdev = alloc_etherdev(sizeof(*kaweth));
 	if (!netdev)
 		return -ENOMEM;
 
 	kaweth = netdev_priv(netdev);
-	kaweth->dev = dev;
+	kaweth->dev = udev;
 	kaweth->net = netdev;
 
 	spin_lock_init(&kaweth->device_lock);
 	init_waitqueue_head(&kaweth->term_wait);
 
-	dbg("Resetting.");
+	dev_dbg(dev, "Resetting.\n");
 
 	kaweth_reset(kaweth);
 
@@ -1041,17 +1044,17 @@ static int kaweth_probe(
 	 * downloaded. Don't try to do it again, or we'll hang the device.
 	 */
 
-	if (le16_to_cpu(dev->descriptor.bcdDevice) >> 8) {
-		dev_info(&intf->dev, "Firmware present in device.\n");
+	if (le16_to_cpu(udev->descriptor.bcdDevice) >> 8) {
+		dev_info(dev, "Firmware present in device.\n");
 	} else {
 		/* Download the firmware */
-		dev_info(&intf->dev, "Downloading firmware...\n");
+		dev_info(dev, "Downloading firmware...\n");
 		kaweth->firmware_buf = (__u8 *)__get_free_page(GFP_KERNEL);
 		if ((result = kaweth_download_firmware(kaweth,
 						      "kaweth/new_code.bin",
 						      100,
 						      2)) < 0) {
-			dev_err(&intf->dev, "Error downloading firmware (%d)\n",
+			dev_err(dev, "Error downloading firmware (%d)\n",
 				result);
 			goto err_fw;
 		}
@@ -1060,8 +1063,7 @@ static int kaweth_probe(
 						      "kaweth/new_code_fix.bin",
 						      100,
 						      3)) < 0) {
-			dev_err(&intf->dev,
-				"Error downloading firmware fix (%d)\n",
+			dev_err(dev, "Error downloading firmware fix (%d)\n",
 				result);
 			goto err_fw;
 		}
@@ -1070,8 +1072,7 @@ static int kaweth_probe(
 						      "kaweth/trigger_code.bin",
 						      126,
 						      2)) < 0) {
-			dev_err(&intf->dev,
-				"Error downloading trigger code (%d)\n",
+			dev_err(dev, "Error downloading trigger code (%d)\n",
 				result);
 			goto err_fw;
 
@@ -1081,19 +1082,18 @@ static int kaweth_probe(
 						      "kaweth/trigger_code_fix.bin",
 						      126,
 						      3)) < 0) {
-			dev_err(&intf->dev, "Error downloading trigger code fix (%d)\n", result);
+			dev_err(dev, "Error downloading trigger code fix (%d)\n", result);
 			goto err_fw;
 		}
 
 
 		if ((result = kaweth_trigger_firmware(kaweth, 126)) < 0) {
-			dev_err(&intf->dev, "Error triggering firmware (%d)\n",
-				result);
+			dev_err(dev, "Error triggering firmware (%d)\n", result);
 			goto err_fw;
 		}
 
 		/* Device will now disappear for a moment...  */
-		dev_info(&intf->dev, "Firmware loaded.  I'll be back...\n");
+		dev_info(dev, "Firmware loaded.  I'll be back...\n");
 err_fw:
 		free_page((unsigned long)kaweth->firmware_buf);
 		free_netdev(netdev);
@@ -1103,29 +1103,29 @@ err_fw:
 	result = kaweth_read_configuration(kaweth);
 
 	if(result < 0) {
-		dev_err(&intf->dev, "Error reading configuration (%d), no net device created\n", result);
+		dev_err(dev, "Error reading configuration (%d), no net device created\n", result);
 		goto err_free_netdev;
 	}
 
-	dev_info(&intf->dev, "Statistics collection: %x\n", kaweth->configuration.statistics_mask);
-	dev_info(&intf->dev, "Multicast filter limit: %x\n", kaweth->configuration.max_multicast_filters & ((1 << 15) - 1));
-	dev_info(&intf->dev, "MTU: %d\n", le16_to_cpu(kaweth->configuration.segment_size));
-	dev_info(&intf->dev, "Read MAC address %pM\n", kaweth->configuration.hw_addr);
+	dev_info(dev, "Statistics collection: %x\n", kaweth->configuration.statistics_mask);
+	dev_info(dev, "Multicast filter limit: %x\n", kaweth->configuration.max_multicast_filters & ((1 << 15) - 1));
+	dev_info(dev, "MTU: %d\n", le16_to_cpu(kaweth->configuration.segment_size));
+	dev_info(dev, "Read MAC address %pM\n", kaweth->configuration.hw_addr);
 
 	if(!memcmp(&kaweth->configuration.hw_addr,
                    &bcast_addr,
 		   sizeof(bcast_addr))) {
-		dev_err(&intf->dev, "Firmware not functioning properly, no net device created\n");
+		dev_err(dev, "Firmware not functioning properly, no net device created\n");
 		goto err_free_netdev;
 	}
 
 	if(kaweth_set_urb_size(kaweth, KAWETH_BUF_SIZE) < 0) {
-		dbg("Error setting URB size");
+		dev_dbg(dev, "Error setting URB size\n");
 		goto err_free_netdev;
 	}
 
 	if(kaweth_set_sofs_wait(kaweth, KAWETH_SOFS_TO_WAIT) < 0) {
-		dev_err(&intf->dev, "Error setting SOFS wait\n");
+		dev_err(dev, "Error setting SOFS wait\n");
 		goto err_free_netdev;
 	}
 
@@ -1135,11 +1135,11 @@ err_fw:
                                            KAWETH_PACKET_FILTER_MULTICAST);
 
 	if(result < 0) {
-		dev_err(&intf->dev, "Error setting receive filter\n");
+		dev_err(dev, "Error setting receive filter\n");
 		goto err_free_netdev;
 	}
 
-	dbg("Initializing net device.");
+	dev_dbg(dev, "Initializing net device.\n");
 
 	kaweth->intf = intf;
 
@@ -1181,20 +1181,20 @@ err_fw:
 
 #if 0
 // dma_supported() is deeply broken on almost all architectures
-	if (dma_supported (&intf->dev, 0xffffffffffffffffULL))
+	if (dma_supported (dev, 0xffffffffffffffffULL))
 		kaweth->net->features |= NETIF_F_HIGHDMA;
 #endif
 
-	SET_NETDEV_DEV(netdev, &intf->dev);
+	SET_NETDEV_DEV(netdev, dev);
 	if (register_netdev(netdev) != 0) {
-		dev_err(&intf->dev, "Error registering netdev.\n");
+		dev_err(dev, "Error registering netdev.\n");
 		goto err_intfdata;
 	}
 
-	dev_info(&intf->dev, "kaweth interface created at %s\n",
+	dev_info(dev, "kaweth interface created at %s\n",
 		 kaweth->net->name);
 
-	dbg("Kaweth probe returning.");
+	dev_dbg(dev, "Kaweth probe returning.\n");
 
 	return 0;
 
@@ -1232,7 +1232,7 @@ static void kaweth_disconnect(struct usb_interface *intf)
 	}
 	netdev = kaweth->net;
 
-	dbg("Unregistering net device");
+	netdev_dbg(kaweth->net, "Unregistering net device\n");
 	unregister_netdev(netdev);
 
 	usb_free_urb(kaweth->rx_urb);
diff --git a/drivers/net/usb/net1080.c b/drivers/net/usb/net1080.c
index 28c4d51..aad7abe 100644
--- a/drivers/net/usb/net1080.c
+++ b/drivers/net/usb/net1080.c
@@ -156,11 +156,11 @@ static void nc_dump_registers(struct usbnet *dev)
 	u16	*vp = kmalloc(sizeof (u16));
 
 	if (!vp) {
-		dbg("no memory?");
+		netdev_dbg(dev->net, "no memory?\n");
 		return;
 	}
 
-	dbg("%s registers:", dev->net->name);
+	netdev_dbg(dev->net, "registers:\n");
 	for (reg = 0; reg < 0x20; reg++) {
 		int retval;
 
@@ -172,11 +172,10 @@ static void nc_dump_registers(struct usbnet *dev)
 
 		retval = nc_register_read(dev, reg, vp);
 		if (retval < 0)
-			dbg("%s reg [0x%x] ==> error %d",
-				dev->net->name, reg, retval);
+			netdev_dbg(dev->net, "reg [0x%x] ==> error %d\n",
+				   reg, retval);
 		else
-			dbg("%s reg [0x%x] = 0x%x",
-				dev->net->name, reg, *vp);
+			netdev_dbg(dev->net, "reg [0x%x] = 0x%x\n", reg, *vp);
 	}
 	kfree(vp);
 }
@@ -300,15 +299,15 @@ static int net1080_reset(struct usbnet *dev)
 	// nc_dump_registers(dev);
 
 	if ((retval = nc_register_read(dev, REG_STATUS, vp)) < 0) {
-		dbg("can't read %s-%s status: %d",
-			dev->udev->bus->bus_name, dev->udev->devpath, retval);
+		netdev_dbg(dev->net, "can't read %s-%s status: %d\n",
+			   dev->udev->bus->bus_name, dev->udev->devpath, retval);
 		goto done;
 	}
 	status = *vp;
 	nc_dump_status(dev, status);
 
 	if ((retval = nc_register_read(dev, REG_USBCTL, vp)) < 0) {
-		dbg("can't read USBCTL, %d", retval);
+		netdev_dbg(dev->net, "can't read USBCTL, %d\n", retval);
 		goto done;
 	}
 	usbctl = *vp;
@@ -318,7 +317,7 @@ static int net1080_reset(struct usbnet *dev)
 			USBCTL_FLUSH_THIS | USBCTL_FLUSH_OTHER);
 
 	if ((retval = nc_register_read(dev, REG_TTL, vp)) < 0) {
-		dbg("can't read TTL, %d", retval);
+		netdev_dbg(dev->net, "can't read TTL, %d\n", retval);
 		goto done;
 	}
 	ttl = *vp;
@@ -326,7 +325,7 @@ static int net1080_reset(struct usbnet *dev)
 
 	nc_register_write(dev, REG_TTL,
 			MK_TTL(NC_READ_TTL_MS, TTL_OTHER(ttl)) );
-	dbg("%s: assigned TTL, %d ms", dev->net->name, NC_READ_TTL_MS);
+	netdev_dbg(dev->net, "assigned TTL, %d ms\n", NC_READ_TTL_MS);
 
 	netif_info(dev, link, dev->net, "port %c, peer %sconnected\n",
 		   (status & STATUS_PORT_A) ? 'A' : 'B',
@@ -350,7 +349,7 @@ static int net1080_check_connect(struct usbnet *dev)
 	status = *vp;
 	kfree(vp);
 	if (retval != 0) {
-		dbg("%s net1080_check_conn read - %d", dev->net->name, retval);
+		netdev_dbg(dev->net, "net1080_check_conn read - %d\n", retval);
 		return retval;
 	}
 	if ((status & STATUS_CONN_OTHER) != STATUS_CONN_OTHER)
@@ -422,8 +421,9 @@ static int net1080_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 	if (!(skb->len & 0x01)) {
 #ifdef DEBUG
 		struct net_device	*net = dev->net;
-		dbg("rx framesize %d range %d..%d mtu %d", skb->len,
-			net->hard_header_len, dev->hard_mtu, net->mtu);
+		netdev_dbg(dev->net, "rx framesize %d range %d..%d mtu %d\n",
+			   skb->len, net->hard_header_len, dev->hard_mtu,
+			   net->mtu);
 #endif
 		dev->net->stats.rx_frame_errors++;
 		nc_ensure_sync(dev);
@@ -435,17 +435,17 @@ static int net1080_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 	packet_len = le16_to_cpup(&header->packet_len);
 	if (FRAMED_SIZE(packet_len) > NC_MAX_PACKET) {
 		dev->net->stats.rx_frame_errors++;
-		dbg("packet too big, %d", packet_len);
+		netdev_dbg(dev->net, "packet too big, %d\n", packet_len);
 		nc_ensure_sync(dev);
 		return 0;
 	} else if (hdr_len < MIN_HEADER) {
 		dev->net->stats.rx_frame_errors++;
-		dbg("header too short, %d", hdr_len);
+		netdev_dbg(dev->net, "header too short, %d\n", hdr_len);
 		nc_ensure_sync(dev);
 		return 0;
 	} else if (hdr_len > MIN_HEADER) {
 		// out of band data for us?
-		dbg("header OOB, %d bytes", hdr_len - MIN_HEADER);
+		netdev_dbg(dev->net, "header OOB, %d bytes\n", hdr_len - MIN_HEADER);
 		nc_ensure_sync(dev);
 		// switch (vendor/product ids) { ... }
 	}
@@ -458,23 +458,23 @@ static int net1080_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 	if ((packet_len & 0x01) == 0) {
 		if (skb->data [packet_len] != PAD_BYTE) {
 			dev->net->stats.rx_frame_errors++;
-			dbg("bad pad");
+			netdev_dbg(dev->net, "bad pad\n");
 			return 0;
 		}
 		skb_trim(skb, skb->len - 1);
 	}
 	if (skb->len != packet_len) {
 		dev->net->stats.rx_frame_errors++;
-		dbg("bad packet len %d (expected %d)",
-			skb->len, packet_len);
+		netdev_dbg(dev->net, "bad packet len %d (expected %d)\n",
+			   skb->len, packet_len);
 		nc_ensure_sync(dev);
 		return 0;
 	}
 	if (header->packet_id != get_unaligned(&trailer->packet_id)) {
 		dev->net->stats.rx_fifo_errors++;
-		dbg("(2+ dropped) rx packet_id mismatch 0x%x 0x%x",
-			le16_to_cpu(header->packet_id),
-			le16_to_cpu(trailer->packet_id));
+		netdev_dbg(dev->net, "(2+ dropped) rx packet_id mismatch 0x%x 0x%x\n",
+			   le16_to_cpu(header->packet_id),
+			   le16_to_cpu(trailer->packet_id));
 		return 0;
 	}
 #if 0
diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c
index 0e2c92e..5f39a3b 100644
--- a/drivers/net/usb/rtl8150.c
+++ b/drivers/net/usb/rtl8150.c
@@ -275,7 +275,7 @@ static int rtl8150_set_mac_address(struct net_device *netdev, void *p)
 		return -EBUSY;
 
 	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
-	dbg("%s: Setting MAC address to %pM\n", netdev->name, netdev->dev_addr);
+	netdev_dbg(netdev, "Setting MAC address to %pM\n", netdev->dev_addr);
 	/* Set the IDR registers. */
 	set_registers(dev, IDR, netdev->addr_len, netdev->dev_addr);
 #ifdef EEPROM_WRITE
@@ -503,12 +503,12 @@ static void intr_callback(struct urb *urb)
 	if ((d[INT_MSR] & MSR_LINK) == 0) {
 		if (netif_carrier_ok(dev->netdev)) {
 			netif_carrier_off(dev->netdev);
-			dbg("%s: LINK LOST\n", __func__);
+			netdev_dbg(dev->netdev, "%s: LINK LOST\n", __func__);
 		}
 	} else {
 		if (!netif_carrier_ok(dev->netdev)) {
 			netif_carrier_on(dev->netdev);
-			dbg("%s: LINK CAME BACK\n", __func__);
+			netdev_dbg(dev->netdev, "%s: LINK CAME BACK\n", __func__);
 		}
 	}
 
--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Re: HTB vs CoDel performance
From: Rick Jones @ 2012-09-19 17:00 UTC (permalink / raw)
  To: Lin Ming; +Cc: Eric Dumazet, networking
In-Reply-To: <CAF1ivSYQiXBOtzZdDUKEqWWsW5cgLgSkLs88Dvkppb02yN6wTg@mail.gmail.com>

On 09/18/2012 06:26 PM, Lin Ming wrote:
> On Tue, Sep 18, 2012 at 6:15 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>> Are you really cpu limited ? You might hit some clocks artifacts.
>
> Did you mean the cpu speed? It's an ARMv5 processor.
> BogoMIPS	: 1196.03

At the risk of typing into Eric's keyboard, he was asking if you were 
saturating the CPU - was it getting to 100% utilization, that sort of thing.

>> rate limiting to 1Gbps probably need high resolution timers.
>
> High resolution timer is enabled.

When you are running your tests, what sort of CPU utilization do you see 
on the CPU of your router with HTB on vs off.  Some "quick and dirty" 
netperf tests on an Centrino-based laptop suggested that as an end 
system at least, HTB at 1 GbE (using your tc commands) increases service 
demand (what netperf calculates as CPU consumed per unit of work 
performed) ~15% for bulk transfer (netperf TCP_STREAM) and ~18% for 
small packet request/response (TCP_RR).

rick jones

^ permalink raw reply

* [PATCH net-next v1] net: use a per task frag allocator
From: Eric Dumazet @ 2012-09-19 16:56 UTC (permalink / raw)
  To: David Miller; +Cc: linux-kernel, netdev

From: Eric Dumazet <edumazet@google.com>

We currently use a per socket page reserve for tcp_sendmsg() operations.

This page is used to build fragments for skbs.

Its done to increase probability of coalescing small write() into
single segments in skbs still in write queue (not yet sent)

But it wastes a lot of memory for applications handling many mostly
idle sockets, since each socket holds one page in sk->sk_sndmsg_page

Its also quite inefficient to build TSO packets of 64KB, because we need
about 16 pages per skb on arches where PAGE_SIZE = 4096, so we hit
page allocator more than wanted.

This patch switches this frag allocator from socket to task structure,
and uses bigger pages.

(up to 32768 bytes per frag, thats order-3 pages on x86)

This increases TCP stream performance by 20% on loopback device,
but also benefits on other network devices, since 8x less frags are
mapped on transmit and unmapped on tx completion.

Its possible some TSO enabled hardware cant cope with bigger fragments,
but their ndo_start_xmit() should already handle this, splitting a
fragment in sub fragments, since some arches have PAGE_SIZE=65536

Successfully tested on various ethernet devices.
(ixgbe, igb, bnx2x, tg3, mellanox mlx4)

Followup patches can use this infrastructure in two other spots
and get rid of the socket sk_sndmsg_page.

Open for discussion : Should we fallback to smaller pages
if order-3 page allocations fail ?

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/linux/sched.h |    6 ++++++
 include/net/sock.h    |   12 +++++++++---
 kernel/exit.c         |    3 +++
 kernel/fork.c         |    1 +
 net/ipv4/tcp.c        |   34 +++++++++++++++++-----------------
 net/ipv4/tcp_ipv4.c   |    4 +---
 6 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b8c8664..ad61100 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1530,6 +1530,12 @@ struct task_struct {
 	 * cache last used pipe for splice
 	 */
 	struct pipe_inode_info *splice_pipe;
+	/*
+	 * cache for page frag allocator
+	 */
+	struct page *sndmsg_page;
+	unsigned int sndmsg_off;
+
 #ifdef	CONFIG_TASK_DELAY_ACCT
 	struct task_delay_info *delays;
 #endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 181b711..431122c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -247,8 +247,8 @@ struct cg_proto;
   *	@sk_stamp: time stamp of last packet received
   *	@sk_socket: Identd and reporting IO signals
   *	@sk_user_data: RPC layer private data
-  *	@sk_sndmsg_page: cached page for sendmsg
-  *	@sk_sndmsg_off: cached offset for sendmsg
+  *	@sk_sndmsg_page: cached page for splice/ip6_append_data()
+  *	@sk_sndmsg_off: cached offset for splice/ip6_append_data()
   *	@sk_peek_off: current peek_offset value
   *	@sk_send_head: front of stuff to transmit
   *	@sk_security: used by security modules
@@ -2034,11 +2034,17 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk)
 
 struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp);
 
+/* On 32bit arches, an skb frag is limited to 2^15, because
+ * (struct skb_frag_struct)->size/offset are u16
+ */
+#define SNDMSG_PAGE_ORDER	min(get_order(32768), PAGE_ALLOC_COSTLY_ORDER)
+#define SNDMSG_PAGE_SIZE	(PAGE_SIZE << SNDMSG_PAGE_ORDER)
+
 static inline struct page *sk_stream_alloc_page(struct sock *sk)
 {
 	struct page *page = NULL;
 
-	page = alloc_pages(sk->sk_allocation, 0);
+	page = alloc_pages(sk->sk_allocation | __GFP_COMP, SNDMSG_PAGE_ORDER);
 	if (!page) {
 		sk_enter_memory_pressure(sk);
 		sk_stream_moderate_sndbuf(sk);
diff --git a/kernel/exit.c b/kernel/exit.c
index f65345f..487b81a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1046,6 +1046,9 @@ void do_exit(long code)
 	if (tsk->splice_pipe)
 		__free_pipe_info(tsk->splice_pipe);
 
+	if (tsk->sndmsg_page)
+		put_page(tsk->sndmsg_page);
+
 	validate_creds_for_do_exit(tsk);
 
 	preempt_disable();
diff --git a/kernel/fork.c b/kernel/fork.c
index 2c8857e..60b58af 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -330,6 +330,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 	tsk->btrace_seq = 0;
 #endif
 	tsk->splice_pipe = NULL;
+	tsk->sndmsg_page = NULL;
 
 	account_kernel_stack(ti, 1);
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index df83d74..7942d82 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1152,16 +1152,16 @@ new_segment:
 			} else {
 				bool merge = false;
 				int i = skb_shinfo(skb)->nr_frags;
-				struct page *page = sk->sk_sndmsg_page;
+				struct page *page = current->sndmsg_page;
 				int off;
 
 				if (page && page_count(page) == 1)
-					sk->sk_sndmsg_off = 0;
+					current->sndmsg_off = 0;
 
-				off = sk->sk_sndmsg_off;
+				off = current->sndmsg_off;
 
 				if (skb_can_coalesce(skb, i, page, off) &&
-				    off != PAGE_SIZE) {
+				    off != SNDMSG_PAGE_SIZE) {
 					/* We can extend the last page
 					 * fragment. */
 					merge = true;
@@ -1173,16 +1173,16 @@ new_segment:
 					tcp_mark_push(tp, skb);
 					goto new_segment;
 				} else if (page) {
-					if (off == PAGE_SIZE) {
+					if (off == SNDMSG_PAGE_SIZE) {
 						put_page(page);
-						sk->sk_sndmsg_page = page = NULL;
+						current->sndmsg_page = page = NULL;
 						off = 0;
 					}
 				} else
 					off = 0;
 
-				if (copy > PAGE_SIZE - off)
-					copy = PAGE_SIZE - off;
+				if (copy > SNDMSG_PAGE_SIZE - off)
+					copy = SNDMSG_PAGE_SIZE - off;
 
 				if (!sk_wmem_schedule(sk, copy))
 					goto wait_for_memory;
@@ -1198,12 +1198,12 @@ new_segment:
 				err = skb_copy_to_page_nocache(sk, from, skb,
 							       page, off, copy);
 				if (err) {
-					/* If this page was new, give it to the
-					 * socket so it does not get leaked.
+					/* If this page was new, remember it
+					 * so it does not get leaked.
 					 */
-					if (!sk->sk_sndmsg_page) {
-						sk->sk_sndmsg_page = page;
-						sk->sk_sndmsg_off = 0;
+					if (!current->sndmsg_page) {
+						current->sndmsg_page = page;
+						current->sndmsg_off = 0;
 					}
 					goto do_error;
 				}
@@ -1213,15 +1213,15 @@ new_segment:
 					skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
 				} else {
 					skb_fill_page_desc(skb, i, page, off, copy);
-					if (sk->sk_sndmsg_page) {
+					if (current->sndmsg_page) {
 						get_page(page);
-					} else if (off + copy < PAGE_SIZE) {
+					} else if (off + copy < SNDMSG_PAGE_SIZE) {
 						get_page(page);
-						sk->sk_sndmsg_page = page;
+						current->sndmsg_page = page;
 					}
 				}
 
-				sk->sk_sndmsg_off = off + copy;
+				current->sndmsg_off = off + copy;
 			}
 
 			if (!copied)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e64abed..e457d65 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2196,9 +2196,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
 	if (inet_csk(sk)->icsk_bind_hash)
 		inet_put_port(sk);
 
-	/*
-	 * If sendmsg cached page exists, toss it.
-	 */
+	/* If cached page exists, toss it. */
 	if (sk->sk_sndmsg_page) {
 		__free_page(sk->sk_sndmsg_page);
 		sk->sk_sndmsg_page = NULL;

^ permalink raw reply related

* Re: Davicom DM9000C driver
From: Bruno Prémont @ 2012-09-19 16:39 UTC (permalink / raw)
  To: Allen Huang
  Cc: linux-kernel, 'Michael Chen', 'Charles',
	'Joseph Chang', Linux network list
In-Reply-To: <20120919055333.3ECF95F94D@mail.davicom.com.tw>

[-- Attachment #1: Type: text/plain, Size: 1252 bytes --]

Hi Allen,

[CCing netdev, keeping .c/.h source attached]

On Wed, 19 September 2012 Allen Huang (黃偉格)  <allen_huang@davicom.com.tw> wrote:
> I'm Allen Huang from Davicom. We are hereby opensourcing the linux
> driver for our DM9000C. 

Ah, from looking at the code DM9000C looks like it is some nerwork chip,
what platforms does it show up on?

> We would appreciate any comments that you have on our driver and
> whether it is ready to go into the kernel. Please see DM9000C driver in the
> attachment.

It would be nice if you could include the changes to Kconfig/Makefile
including a description as it's not clear on what kind of devices the
chip can be encountered.

Also please properly tag the source files attachments as text/plain.


With a quick glance at the code:
- your comments are often single-line C++ style //
  and should be changed to /*  */.

- comments that look like author/revision tags

- there are blocks of code commented or inside if (1) {}

- some printk calls missing KERN_ severity tag

- indentation issues


Please fix above issues (and have a look at scripts/checkpatch.pl) and
respin (as a patch) taking care to CC netdev so network people have to
chance to see it.

Bruno

[-- Attachment #2: dm9000_KT2.6.31.c --]
[-- Type: text/plain, Size: 37105 bytes --]

/*
 *      Davicom DM9000 Fast Ethernet driver for Linux.
 * 	Copyright (C) 1997  Sten Wang
 *
 * 	This program is free software; you can redistribute it and/or
 * 	modify it under the terms of the GNU General Public License
 * 	as published by the Free Software Foundation; either version 2
 * 	of the License, or (at your option) any later version.
 *
 * 	This program is distributed in the hope that it will be useful,
 * 	but WITHOUT ANY WARRANTY; without even the implied warranty of
 * 	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * 	GNU General Public License for more details.
 *
 * (C) Copyright 1997-1998 DAVICOM Semiconductor,Inc. All Rights Reserved.
 *
 * Additional updates, Copyright:
 *	Ben Dooks <ben@simtec.co.uk>
 *	Sascha Hauer <s.hauer@pengutronix.de>
 *  
 * 2010.07.20 V_R1 1.Write PHY Reg27 = 0xE100
 *								 2.Just enable PHY once after GPIO setting in dm9000_init_dm9000()
 *								 3.Remove power down PHY in dm9000_shutdown()
 * 2010.07.20 V_R2 1.Delay 20ms after PHY power on
 *								 2.Reset PHY after PHY power on in dm9000_init_dm9000()
 * 2012.06.05 KT2.6.31_R2 1. Add the solution to fix the power-on FIFO data bytes shift issue! (Wr NCR 0x03)
 */

#include <linux/module.h>
#include <linux/ioport.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/init.h>
#include <linux/skbuff.h>
#include <linux/version.h>
#include <linux/spinlock.h>
#include <linux/crc32.h>
#include <linux/mii.h>
#include <linux/ethtool.h>
#include <linux/dm9000.h>
#include <linux/delay.h>
#include <linux/platform_device.h>
#include <linux/irq.h>

#include <asm/delay.h>
#include <asm/irq.h>
#include <asm/io.h>

#include "dm9000.h"

/* Board/System/Debug information/definition ---------------- */

#define DM9000_PHY		0x40	/* PHY address 0x01 */

#define CARDNAME	"dm9000"
#define DRV_VERSION	"2.6.31"

/*
 * Transmit timeout, default 5 seconds.
 */
static int watchdog = 5000;
module_param(watchdog, int, 0400);
MODULE_PARM_DESC(watchdog, "transmit timeout in milliseconds");

/* DM9000 register address locking.
 *
 * The DM9000 uses an address register to control where data written
 * to the data register goes. This means that the address register
 * must be preserved over interrupts or similar calls.
 *
 * During interrupt and other critical calls, a spinlock is used to
 * protect the system, but the calls themselves save the address
 * in the address register in case they are interrupting another
 * access to the device.
 *
 * For general accesses a lock is provided so that calls which are
 * allowed to sleep are serialised so that the address register does
 * not need to be saved. This lock also serves to serialise access
 * to the EEPROM and PHY access registers which are shared between
 * these two devices.
 */

/* The driver supports the original DM9000E, and now the two newer
 * devices, DM9000A and DM9000B.
 */

enum dm9000_type {
	TYPE_DM9000E,	/* original DM9000 */
	TYPE_DM9000A,
	TYPE_DM9000B
};

/* Structure/enum declaration ------------------------------- */
typedef struct board_info {

	void __iomem	*io_addr;	/* Register I/O base address */
	void __iomem	*io_data;	/* Data I/O address */
	u16		 irq;		/* IRQ */

	u16		tx_pkt_cnt;
	u16		queue_pkt_len;
	u16		queue_start_addr;
	u16		dbug_cnt;
	u8		io_mode;		/* 0:word, 2:byte */
	u8		phy_addr;
	u8		imr_all;

	unsigned int	flags;
	unsigned int	in_suspend :1;
	int		debug_level;

	enum dm9000_type type;

	void (*inblk)(void __iomem *port, void *data, int length);
	void (*outblk)(void __iomem *port, void *data, int length);
	void (*dumpblk)(void __iomem *port, int length);

	struct device	*dev;	     /* parent device */

	struct resource	*addr_res;   /* resources found */
	struct resource *data_res;
	struct resource	*addr_req;   /* resources requested */
	struct resource *data_req;
	struct resource *irq_res;

	struct mutex	 addr_lock;	/* phy and eeprom access lock */

	struct delayed_work phy_poll;
	struct net_device  *ndev;

	spinlock_t	lock;

	struct mii_if_info mii;
	u32		msg_enable;
} board_info_t;

static void
dm9000_phy_write(struct net_device *dev,
		 int phyaddr_unused, int reg, int value);

/* debug code */

#define dm9000_dbg(db, lev, msg...) do {		\
	if ((lev) < CONFIG_DM9000_DEBUGLEVEL &&		\
	    (lev) < db->debug_level) {			\
		dev_dbg(db->dev, msg);			\
	}						\
} while (0)

static inline board_info_t *to_dm9000_board(struct net_device *dev)
{
	return netdev_priv(dev);
}

/* DM9000 network board routine ---------------------------- */

static void
dm9000_reset(board_info_t * db)
{
	dev_dbg(db->dev, "resetting device\n");

	/* RESET device */
	writeb(DM9000_NCR, db->io_addr);
	udelay(200);
	writeb(NCR_RST, db->io_data);
	udelay(200);
}

/*
 *   Read a byte from I/O port
 */
static u8
ior(board_info_t * db, int reg)
{
	writeb(reg, db->io_addr);
	return readb(db->io_data);
}

/*
 *   Write a byte to I/O port
 */

static void
iow(board_info_t * db, int reg, int value)
{
	writeb(reg, db->io_addr);
	writeb(value, db->io_data);
}

/* routines for sending block to chip */

static void dm9000_outblk_8bit(void __iomem *reg, void *data, int count)
{
	writesb(reg, data, count);
}

static void dm9000_outblk_16bit(void __iomem *reg, void *data, int count)
{
	writesw(reg, data, (count+1) >> 1);
}

static void dm9000_outblk_32bit(void __iomem *reg, void *data, int count)
{
	writesl(reg, data, (count+3) >> 2);
}

/* input block from chip to memory */

static void dm9000_inblk_8bit(void __iomem *reg, void *data, int count)
{
	readsb(reg, data, count);
}


static void dm9000_inblk_16bit(void __iomem *reg, void *data, int count)
{
	readsw(reg, data, (count+1) >> 1);
}

static void dm9000_inblk_32bit(void __iomem *reg, void *data, int count)
{
	readsl(reg, data, (count+3) >> 2);
}

/* dump block from chip to null */

static void dm9000_dumpblk_8bit(void __iomem *reg, int count)
{
	int i;
	int tmp;

	for (i = 0; i < count; i++)
		tmp = readb(reg);
}

static void dm9000_dumpblk_16bit(void __iomem *reg, int count)
{
	int i;
	int tmp;

	count = (count + 1) >> 1;

	for (i = 0; i < count; i++)
		tmp = readw(reg);
}

static void dm9000_dumpblk_32bit(void __iomem *reg, int count)
{
	int i;
	int tmp;

	count = (count + 3) >> 2;

	for (i = 0; i < count; i++)
		tmp = readl(reg);
}

/* dm9000_set_io
 *
 * select the specified set of io routines to use with the
 * device
 */

static void dm9000_set_io(struct board_info *db, int byte_width)
{
	/* use the size of the data resource to work out what IO
	 * routines we want to use
	 */

	switch (byte_width) {
	case 1:
		db->dumpblk = dm9000_dumpblk_8bit;
		db->outblk  = dm9000_outblk_8bit;
		db->inblk   = dm9000_inblk_8bit;
		break;


	case 3:
		dev_dbg(db->dev, ": 3 byte IO, falling back to 16bit\n");
	case 2:
		db->dumpblk = dm9000_dumpblk_16bit;
		db->outblk  = dm9000_outblk_16bit;
		db->inblk   = dm9000_inblk_16bit;
		break;

	case 4:
	default:
		db->dumpblk = dm9000_dumpblk_32bit;
		db->outblk  = dm9000_outblk_32bit;
		db->inblk   = dm9000_inblk_32bit;
		break;
	}
}

static void dm9000_schedule_poll(board_info_t *db)
{
	if (db->type == TYPE_DM9000E)
		schedule_delayed_work(&db->phy_poll, HZ * 2);
}

static int dm9000_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
{
	board_info_t *dm = to_dm9000_board(dev);

	if (!netif_running(dev))
		return -EINVAL;

	return generic_mii_ioctl(&dm->mii, if_mii(req), cmd, NULL);
}

static unsigned int
dm9000_read_locked(board_info_t *db, int reg)
{
	unsigned long flags;
	unsigned int ret;

	spin_lock_irqsave(&db->lock, flags);
	ret = ior(db, reg);
	spin_unlock_irqrestore(&db->lock, flags);

	return ret;
}

static int dm9000_wait_eeprom(board_info_t *db)
{
	unsigned int status;
	int timeout = 8;	/* wait max 8msec */

	/* The DM9000 data sheets say we should be able to
	 * poll the ERRE bit in EPCR to wait for the EEPROM
	 * operation. From testing several chips, this bit
	 * does not seem to work.
	 *
	 * We attempt to use the bit, but fall back to the
	 * timeout (which is why we do not return an error
	 * on expiry) to say that the EEPROM operation has
	 * completed.
	 */

	while (1) {
		status = dm9000_read_locked(db, DM9000_EPCR);

		if ((status & EPCR_ERRE) == 0)
			break;

		msleep(1);

		if (timeout-- < 0) {
			dev_dbg(db->dev, "timeout waiting EEPROM\n");
			break;
		}
	}

	return 0;
}

/*
 *  Read a word data from EEPROM
 */
static void
dm9000_read_eeprom(board_info_t *db, int offset, u8 *to)
{
	unsigned long flags;

	if (db->flags & DM9000_PLATF_NO_EEPROM) {
		to[0] = 0xff;
		to[1] = 0xff;
		return;
	}

	mutex_lock(&db->addr_lock);

	spin_lock_irqsave(&db->lock, flags);

	iow(db, DM9000_EPAR, offset);
	iow(db, DM9000_EPCR, EPCR_ERPRR);

	spin_unlock_irqrestore(&db->lock, flags);

	dm9000_wait_eeprom(db);

	/* delay for at-least 150uS */
	msleep(1);

	spin_lock_irqsave(&db->lock, flags);

	iow(db, DM9000_EPCR, 0x0);

	to[0] = ior(db, DM9000_EPDRL);
	to[1] = ior(db, DM9000_EPDRH);

	spin_unlock_irqrestore(&db->lock, flags);

	mutex_unlock(&db->addr_lock);
}

/*
 * Write a word data to SROM
 */
static void
dm9000_write_eeprom(board_info_t *db, int offset, u8 *data)
{
	unsigned long flags;

	if (db->flags & DM9000_PLATF_NO_EEPROM)
		return;

	mutex_lock(&db->addr_lock);

	spin_lock_irqsave(&db->lock, flags);
	iow(db, DM9000_EPAR, offset);
	iow(db, DM9000_EPDRH, data[1]);
	iow(db, DM9000_EPDRL, data[0]);
	iow(db, DM9000_EPCR, EPCR_WEP | EPCR_ERPRW);
	spin_unlock_irqrestore(&db->lock, flags);

	dm9000_wait_eeprom(db);

	mdelay(1);	/* wait at least 150uS to clear */

	spin_lock_irqsave(&db->lock, flags);
	iow(db, DM9000_EPCR, 0);
	spin_unlock_irqrestore(&db->lock, flags);

	mutex_unlock(&db->addr_lock);
}

/* ethtool ops */

static void dm9000_get_drvinfo(struct net_device *dev,
			       struct ethtool_drvinfo *info)
{
	board_info_t *dm = to_dm9000_board(dev);

	strcpy(info->driver, CARDNAME);
	strcpy(info->version, DRV_VERSION);
	strcpy(info->bus_info, to_platform_device(dm->dev)->name);
}

static u32 dm9000_get_msglevel(struct net_device *dev)
{
	board_info_t *dm = to_dm9000_board(dev);

	return dm->msg_enable;
}

static void dm9000_set_msglevel(struct net_device *dev, u32 value)
{
	board_info_t *dm = to_dm9000_board(dev);

	dm->msg_enable = value;
}

static int dm9000_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
{
	board_info_t *dm = to_dm9000_board(dev);

	mii_ethtool_gset(&dm->mii, cmd);
	return 0;
}

static int dm9000_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
{
	board_info_t *dm = to_dm9000_board(dev);

	return mii_ethtool_sset(&dm->mii, cmd);
}

static int dm9000_nway_reset(struct net_device *dev)
{
	board_info_t *dm = to_dm9000_board(dev);
	return mii_nway_restart(&dm->mii);
}

static u32 dm9000_get_link(struct net_device *dev)
{
	board_info_t *dm = to_dm9000_board(dev);
	u32 ret;

	if (dm->flags & DM9000_PLATF_EXT_PHY)
		ret = mii_link_ok(&dm->mii);
	else
		ret = dm9000_read_locked(dm, DM9000_NSR) & NSR_LINKST ? 1 : 0;

	return ret;
}

#define DM_EEPROM_MAGIC		(0x444D394B)

static int dm9000_get_eeprom_len(struct net_device *dev)
{
	return 128;
}

static int dm9000_get_eeprom(struct net_device *dev,
			     struct ethtool_eeprom *ee, u8 *data)
{
	board_info_t *dm = to_dm9000_board(dev);
	int offset = ee->offset;
	int len = ee->len;
	int i;

	/* EEPROM access is aligned to two bytes */

	if ((len & 1) != 0 || (offset & 1) != 0)
		return -EINVAL;

	if (dm->flags & DM9000_PLATF_NO_EEPROM)
		return -ENOENT;

	ee->magic = DM_EEPROM_MAGIC;

	for (i = 0; i < len; i += 2)
		dm9000_read_eeprom(dm, (offset + i) / 2, data + i);

	return 0;
}

static int dm9000_set_eeprom(struct net_device *dev,
			     struct ethtool_eeprom *ee, u8 *data)
{
	board_info_t *dm = to_dm9000_board(dev);
	int offset = ee->offset;
	int len = ee->len;
	int i;

	/* EEPROM access is aligned to two bytes */

	if ((len & 1) != 0 || (offset & 1) != 0)
		return -EINVAL;

	if (dm->flags & DM9000_PLATF_NO_EEPROM)
		return -ENOENT;

	if (ee->magic != DM_EEPROM_MAGIC)
		return -EINVAL;

	for (i = 0; i < len; i += 2)
		dm9000_write_eeprom(dm, (offset + i) / 2, data + i);

	return 0;
}

static const struct ethtool_ops dm9000_ethtool_ops = {
	.get_drvinfo		= dm9000_get_drvinfo,
	.get_settings		= dm9000_get_settings,
	.set_settings		= dm9000_set_settings,
	.get_msglevel		= dm9000_get_msglevel,
	.set_msglevel		= dm9000_set_msglevel,
	.nway_reset		= dm9000_nway_reset,
	.get_link		= dm9000_get_link,
 	.get_eeprom_len		= dm9000_get_eeprom_len,
 	.get_eeprom		= dm9000_get_eeprom,
 	.set_eeprom		= dm9000_set_eeprom,
};

static void dm9000_show_carrier(board_info_t *db,
				unsigned carrier, unsigned nsr)
{
	struct net_device *ndev = db->ndev;
	unsigned ncr = dm9000_read_locked(db, DM9000_NCR);

	if (carrier)
		dev_info(db->dev, "%s: link up, %dMbps, %s-duplex, no LPA\n",
			 ndev->name, (nsr & NSR_SPEED) ? 10 : 100,
			 (ncr & NCR_FDX) ? "full" : "half");
	else
		dev_info(db->dev, "%s: link down\n", ndev->name);
}


static unsigned char dm9000_type_to_char(enum dm9000_type type)
{
	switch (type) {
	case TYPE_DM9000E: return 'e';
	case TYPE_DM9000A: return 'a';
	case TYPE_DM9000B: return 'b';
	}

	return '?';
}

static void
dm9000_poll_work(struct work_struct *w)
{
	struct delayed_work *dw = container_of(w, struct delayed_work, work);
	board_info_t *db = container_of(dw, board_info_t, phy_poll);
	struct net_device *ndev = db->ndev;

//JJ2
//	if (db->flags & DM9000_PLATF_SIMPLE_PHY &&
//	    !(db->flags & DM9000_PLATF_EXT_PHY)) {
//  =
		if(1){						
		unsigned nsr = dm9000_read_locked(db, DM9000_NSR);
		unsigned old_carrier = netif_carrier_ok(ndev) ? 1 : 0;
		unsigned new_carrier;

		new_carrier = (nsr & NSR_LINKST) ? 1 : 0;

		if (old_carrier != new_carrier) {
			
			if (new_carrier)
			  printk(KERN_INFO "[dm9000%c %s Ethernet Driver, V%s]: Link-Up!!\n",dm9000_type_to_char(db->type), CARDNAME, DRV_VERSION); //JJ2
			else
			  printk(KERN_INFO "[%s Ethernet Driver, V%s]: Link-Down!!\n", CARDNAME, DRV_VERSION); //JJ2
			
			if (netif_msg_link(db))
				dm9000_show_carrier(db, new_carrier, nsr);

			if (!new_carrier)
				netif_carrier_off(ndev);
			else
				netif_carrier_on(ndev);
		}
	} else
		mii_check_media(&db->mii, netif_msg_link(db), 0);
	
	if (netif_running(ndev))
		dm9000_schedule_poll(db);
}

/* dm9000_release_board
 *
 * release a board, and any mapped resources
 */

static void
dm9000_release_board(struct platform_device *pdev, struct board_info *db)
{
	/* unmap our resources */

	iounmap(db->io_addr);
	iounmap(db->io_data);

	/* release the resources */

	release_resource(db->data_req);
	kfree(db->data_req);

	release_resource(db->addr_req);
	kfree(db->addr_req);
}



/*
 *  Set DM9000 multicast address
 */
static void
dm9000_hash_table(struct net_device *dev)
{
	board_info_t *db = netdev_priv(dev);
	struct dev_mc_list *mcptr = dev->mc_list;
	int mc_cnt = dev->mc_count;
	int i, oft;
	u32 hash_val;
	u16 hash_table[4];
	u8 rcr = RCR_DIS_LONG | RCR_DIS_CRC | RCR_RXEN;
	unsigned long flags;

	dm9000_dbg(db, 1, "entering %s\n", __func__);

	spin_lock_irqsave(&db->lock, flags);

	for (i = 0, oft = DM9000_PAR; i < 6; i++, oft++)
		iow(db, oft, dev->dev_addr[i]);

	/* Clear Hash Table */
	for (i = 0; i < 4; i++)
		hash_table[i] = 0x0;

	/* broadcast address */
	hash_table[3] = 0x8000;

	if (dev->flags & IFF_PROMISC)
		rcr |= RCR_PRMSC;

	if (dev->flags & IFF_ALLMULTI)
		rcr |= RCR_ALL;

	/* the multicast address in Hash Table : 64 bits */
	for (i = 0; i < mc_cnt; i++, mcptr = mcptr->next) {
		hash_val = ether_crc_le(6, mcptr->dmi_addr) & 0x3f;
		hash_table[hash_val / 16] |= (u16) 1 << (hash_val % 16);
	}

	/* Write the hash table to MAC MD table */
	for (i = 0, oft = DM9000_MAR; i < 4; i++) {
		iow(db, oft++, hash_table[i]);
		iow(db, oft++, hash_table[i] >> 8);
	}

	iow(db, DM9000_RCR, rcr);
	spin_unlock_irqrestore(&db->lock, flags);
}

/*
 * Initilize dm9000 board
 */
static void
dm9000_init_dm9000(struct net_device *dev)
{
	board_info_t *db = netdev_priv(dev);
	unsigned int imr;

	dm9000_dbg(db, 1, "entering %s\n", __func__);

	/* I/O mode */
	db->io_mode = ior(db, DM9000_ISR) >> 6;	/* ISR bit7:6 keeps I/O mode */

	/* GPIO0 on pre-activate PHY */
//V_R1	iow(db, DM9000_GPR, 0);	/* REG_1F bit0 activate phyxcer */
	iow(db, DM9000_GPCR, GPCR_GEP_CNTL);	/* Let GPIO0 output */
	iow(db, DM9000_GPR, 0);	/* Enable PHY */
        mdelay(20);  //V_R2

	dm9000_phy_write(dev, 0, 0, 0x8000); //V_R2 reset PHY
        mdelay (20);


//	if (db->flags & DM9000_PLATF_EXT_PHY)
//		iow(db, DM9000_NCR, NCR_EXT_PHY);

	/* Program operating register */
	iow(db, DM9000_TCR, 0);	        /* TX Polling clear */
	iow(db, DM9000_BPTR, 0x3f);	/* Less 3Kb, 200us */
	iow(db, DM9000_FCR, 0xff);	/* Flow Control */
	iow(db, DM9000_SMCR, 0);        /* Special Mode */
	/* clear TX status */
	iow(db, DM9000_NSR, NSR_WAKEST | NSR_TX2END | NSR_TX1END);
	iow(db, DM9000_ISR, ISR_CLR_STATUS); /* Clear interrupt status */

	/* Set address filter table */
	dm9000_hash_table(dev);

	imr = IMR_PAR | IMR_PTM | IMR_PRM;
	if (db->type != TYPE_DM9000E)
		imr |= IMR_LNKCHNG;

	db->imr_all = imr;

	/* Enable TX/RX interrupt mask */
	iow(db, DM9000_IMR, imr);

	/* Init Driver variable */
	db->tx_pkt_cnt = 0;
	db->queue_pkt_len = 0;
	dev->trans_start = 0;
	
	dm9000_phy_write(dev, 0, 27, 0xE100); //V_R1
}

/* Our watchdog timed out. Called by the networking layer */
static void dm9000_timeout(struct net_device *dev)
{
	board_info_t *db = netdev_priv(dev);
	u8 reg_save;
	unsigned long flags;

	/* Save previous register address */
	reg_save = readb(db->io_addr);
	spin_lock_irqsave(&db->lock, flags);

	netif_stop_queue(dev);
	printk(KERN_INFO "[%s Ethernet Driver, V%s]: Timeout!!\n", CARDNAME, DRV_VERSION); //JJ1
	dm9000_reset(db);
	dm9000_init_dm9000(dev);
	/* We can accept TX packets again */
	dev->trans_start = jiffies;
	netif_wake_queue(dev);

	/* Restore previous register address */
	writeb(reg_save, db->io_addr);
	spin_unlock_irqrestore(&db->lock, flags);
}

/*
 *  Hardware start transmission.
 *  Send a packet to media from the upper layer.
 */
static int
dm9000_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
	unsigned long flags;
	board_info_t *db = netdev_priv(dev);

	dm9000_dbg(db, 3, "%s:\n", __func__);

	if (db->tx_pkt_cnt > 1)
		return 1;

	spin_lock_irqsave(&db->lock, flags);

	/* Move data to DM9000 TX RAM */
	writeb(DM9000_MWCMD, db->io_addr);

	(db->outblk)(db->io_data, skb->data, skb->len);
	dev->stats.tx_bytes += skb->len;

	db->tx_pkt_cnt++;
	/* TX control: First packet immediately send, second packet queue */
	if (db->tx_pkt_cnt == 1) {
		/* Set TX length to DM9000 */
		iow(db, DM9000_TXPLL, skb->len);
		iow(db, DM9000_TXPLH, skb->len >> 8);

		/* Issue TX polling command */
		iow(db, DM9000_TCR, TCR_TXREQ);	/* Cleared after TX complete */

		dev->trans_start = jiffies;	/* save the time stamp */
	} else {
		/* Second packet */
		db->queue_pkt_len = skb->len;
		netif_stop_queue(dev);
	}

	spin_unlock_irqrestore(&db->lock, flags);

	/* free this SKB */
	dev_kfree_skb(skb);

	return 0;
}

/*
 * DM9000 interrupt handler
 * receive the packet to upper layer, free the transmitted packet
 */

static void dm9000_tx_done(struct net_device *dev, board_info_t *db)
{
	int tx_status = ior(db, DM9000_NSR);	/* Got TX status */

	if (tx_status & (NSR_TX2END | NSR_TX1END)) {
		/* One packet sent complete */
		db->tx_pkt_cnt--;
		dev->stats.tx_packets++;

		if (netif_msg_tx_done(db))
			dev_dbg(db->dev, "tx done, NSR %02x\n", tx_status);

		/* Queue packet check & send */
		if (db->tx_pkt_cnt > 0) {
			iow(db, DM9000_TXPLL, db->queue_pkt_len);
			iow(db, DM9000_TXPLH, db->queue_pkt_len >> 8);
			iow(db, DM9000_TCR, TCR_TXREQ);
			dev->trans_start = jiffies;
		}
		netif_wake_queue(dev);
	}
}

struct dm9000_rxhdr {
	u8	RxPktReady;
	u8	RxStatus;
	__le16	RxLen;
} __attribute__((__packed__));

/*
 *  Received a packet and pass to upper layer
 */
static void
dm9000_rx(struct net_device *dev)
{
	board_info_t *db = netdev_priv(dev);
	struct dm9000_rxhdr rxhdr;
	struct sk_buff *skb;
	u8 rxbyte, *rdptr;
	bool GoodPacket;
	int RxLen;

	/* Check packet ready or not */
	do {
		ior(db, DM9000_MRCMDX);	/* Dummy read */

		/* Get most updated data */
		rxbyte = readb(db->io_data);

		/* Status check: this byte must be 0 or 1 */
		if (rxbyte > DM9000_PKT_RDY) {
			dev_warn(db->dev, "status check fail: %d\n", rxbyte);
			iow(db, DM9000_RCR, 0x00);	/* Stop Device */
			iow(db, DM9000_ISR, IMR_PAR);	/* Stop INT request */
			return;
		}

		if (rxbyte != DM9000_PKT_RDY)
			return;

		/* A packet ready now  & Get status/length */
		GoodPacket = true;
		writeb(DM9000_MRCMD, db->io_addr);

		(db->inblk)(db->io_data, &rxhdr, sizeof(rxhdr));

		RxLen = le16_to_cpu(rxhdr.RxLen);

		if (netif_msg_rx_status(db))
			dev_dbg(db->dev, "RX: status %02x, length %04x\n",
				rxhdr.RxStatus, RxLen);

		/* Packet Status check */
		if (RxLen < 0x40) {
			GoodPacket = false;
			if (netif_msg_rx_err(db))
				dev_dbg(db->dev, "RX: Bad Packet (runt)\n");
		}

		if (RxLen > DM9000_PKT_MAX) {
			dev_dbg(db->dev, "RST: RX Len:%x\n", RxLen);
		}

		/* rxhdr.RxStatus is identical to RSR register. */
		if (rxhdr.RxStatus & (RSR_FOE | RSR_CE | RSR_AE |
				      RSR_PLE | RSR_RWTO |
				      RSR_LCS | RSR_RF)) {
			GoodPacket = false;
			if (rxhdr.RxStatus & RSR_FOE) {
				if (netif_msg_rx_err(db))
					dev_dbg(db->dev, "fifo error\n");
				dev->stats.rx_fifo_errors++;
				printk(KERN_INFO "[%s Ethernet Driver, V%s]: FIFO Over Flow!!\n", CARDNAME, DRV_VERSION); //JJ1
			}
			if (rxhdr.RxStatus & RSR_CE) {
				if (netif_msg_rx_err(db))
					dev_dbg(db->dev, "crc error\n");
				dev->stats.rx_crc_errors++;
			}
			if (rxhdr.RxStatus & RSR_RF) {
				if (netif_msg_rx_err(db))
					dev_dbg(db->dev, "length error\n");
				dev->stats.rx_length_errors++;
			}
		}

		/* Move data from DM9000 */
		if (GoodPacket
		    && ((skb = dev_alloc_skb(RxLen + 4)) != NULL)) {
			skb_reserve(skb, 2);
			rdptr = (u8 *) skb_put(skb, RxLen - 4);

			/* Read received packet from RX SRAM */

			(db->inblk)(db->io_data, rdptr, RxLen);
			dev->stats.rx_bytes += RxLen;

			/* Pass to upper layer */
			skb->protocol = eth_type_trans(skb, dev);
			netif_rx(skb);
			dev->stats.rx_packets++;

		} else {
			/* need to dump the packet's data */

			(db->dumpblk)(db->io_data, RxLen);
		}
	} while (rxbyte == DM9000_PKT_RDY);
}

static irqreturn_t dm9000_interrupt(int irq, void *dev_id)
{
	struct net_device *dev = dev_id;
	board_info_t *db = netdev_priv(dev);
	int int_status;
	unsigned long flags;
	u8 reg_save;

	dm9000_dbg(db, 3, "entering %s\n", __func__);

	/* A real interrupt coming */

	/* holders of db->lock must always block IRQs */
	spin_lock_irqsave(&db->lock, flags);

	/* Save previous register address */
	reg_save = readb(db->io_addr);

	/* Disable all interrupts */
	iow(db, DM9000_IMR, IMR_PAR);

	/* Got DM9000 interrupt status */
	int_status = ior(db, DM9000_ISR);	/* Got ISR */
	iow(db, DM9000_ISR, int_status);	/* Clear ISR status */

	if (netif_msg_intr(db))
		dev_dbg(db->dev, "interrupt status %02x\n", int_status);

	/* Received the coming packet */
	if (int_status & ISR_PRS)
		dm9000_rx(dev);

	/* Trnasmit Interrupt check */
	if (int_status & ISR_PTS)
		dm9000_tx_done(dev, db);

	if (db->type != TYPE_DM9000E) {
		if (int_status & ISR_LNKCHNG) {
			/* fire a link-change request */
			schedule_delayed_work(&db->phy_poll, 1);
		}
	}

	/* Re-enable interrupt mask */
	iow(db, DM9000_IMR, db->imr_all);

	/* Restore previous register address */
	writeb(reg_save, db->io_addr);

	 spin_unlock_irqrestore(&db->lock, flags);

	return IRQ_HANDLED;
}

#ifdef CONFIG_NET_POLL_CONTROLLER
/*
 *Used by netconsole
 */
static void dm9000_poll_controller(struct net_device *dev)
{
	disable_irq(dev->irq);
	dm9000_interrupt(dev->irq, dev);
	enable_irq(dev->irq);
}
#endif

/*
 *  Open the interface.
 *  The interface is opened whenever "ifconfig" actives it.
 */
static int
dm9000_open(struct net_device *dev)
{
	board_info_t *db = netdev_priv(dev);
	unsigned long irqflags = db->irq_res->flags & IRQF_TRIGGER_MASK;

	if (netif_msg_ifup(db))
		dev_dbg(db->dev, "enabling %s\n", dev->name);

	/* If there is no IRQ type specified, default to something that
	 * may work, and tell the user that this is a problem */

	if (irqflags == IRQF_TRIGGER_NONE)
		dev_warn(db->dev, "WARNING: no IRQ resource flags set.\n");

	irqflags |= IRQF_SHARED;

	if (request_irq(dev->irq, &dm9000_interrupt, irqflags, dev->name, dev))
		return -EAGAIN;

	/* Initialize DM9000 board */
	dm9000_reset(db);
	dm9000_init_dm9000(dev);

	/* Init driver variable */
	db->dbug_cnt = 0;

	mii_check_media(&db->mii, netif_msg_link(db), 1);
	netif_start_queue(dev);
	
	dm9000_schedule_poll(db);

	return 0;
}

/*
 * Sleep, either by using msleep() or if we are suspending, then
 * use mdelay() to sleep.
 */
static void dm9000_msleep(board_info_t *db, unsigned int ms)
{
	if (db->in_suspend)
		mdelay(ms);
	else
		msleep(ms);
}

/*
 *   Read a word from phyxcer
 */
static int
dm9000_phy_read(struct net_device *dev, int phy_reg_unused, int reg)
{
	board_info_t *db = netdev_priv(dev);
	unsigned long flags;
	unsigned int reg_save;
	int ret;

	mutex_lock(&db->addr_lock);

	spin_lock_irqsave(&db->lock,flags);

	/* Save previous register address */
	reg_save = readb(db->io_addr);

	/* Fill the phyxcer register into REG_0C */
	iow(db, DM9000_EPAR, DM9000_PHY | reg);

	iow(db, DM9000_EPCR, EPCR_ERPRR | EPCR_EPOS);	/* Issue phyxcer read command */

	writeb(reg_save, db->io_addr);
	spin_unlock_irqrestore(&db->lock,flags);

	dm9000_msleep(db, 1);		/* Wait read complete */

	spin_lock_irqsave(&db->lock,flags);
	reg_save = readb(db->io_addr);

	iow(db, DM9000_EPCR, 0x0);	/* Clear phyxcer read command */

	/* The read data keeps on REG_0D & REG_0E */
	ret = (ior(db, DM9000_EPDRH) << 8) | ior(db, DM9000_EPDRL);

	/* restore the previous address */
	writeb(reg_save, db->io_addr);
	spin_unlock_irqrestore(&db->lock,flags);

	mutex_unlock(&db->addr_lock);

	dm9000_dbg(db, 5, "phy_read[%02x] -> %04x\n", reg, ret);
	return ret;
}

/*
 *   Write a word to phyxcer
 */
static void
dm9000_phy_write(struct net_device *dev,
		 int phyaddr_unused, int reg, int value)
{
	board_info_t *db = netdev_priv(dev);
	unsigned long flags;
	unsigned long reg_save;

	dm9000_dbg(db, 5, "phy_write[%02x] = %04x\n", reg, value);
	mutex_lock(&db->addr_lock);

	spin_lock_irqsave(&db->lock,flags);

	/* Save previous register address */
	reg_save = readb(db->io_addr);

	/* Fill the phyxcer register into REG_0C */
	iow(db, DM9000_EPAR, DM9000_PHY | reg);

	/* Fill the written data into REG_0D & REG_0E */
	iow(db, DM9000_EPDRL, value);
	iow(db, DM9000_EPDRH, value >> 8);

	iow(db, DM9000_EPCR, EPCR_EPOS | EPCR_ERPRW);	/* Issue phyxcer write command */

	writeb(reg_save, db->io_addr);
	spin_unlock_irqrestore(&db->lock, flags);

	dm9000_msleep(db, 1);		/* Wait write complete */

	spin_lock_irqsave(&db->lock,flags);
	reg_save = readb(db->io_addr);

	iow(db, DM9000_EPCR, 0x0);	/* Clear phyxcer write command */

	/* restore the previous address */
	writeb(reg_save, db->io_addr);

	spin_unlock_irqrestore(&db->lock, flags);
	mutex_unlock(&db->addr_lock);
}

static void
dm9000_shutdown(struct net_device *dev)
{
	board_info_t *db = netdev_priv(dev);

	/* RESET device */
	dm9000_phy_write(dev, 0, MII_BMCR, BMCR_RESET);	/* PHY RESET */
//V_R1	iow(db, DM9000_GPR, 0x01);	/* Power-Down PHY */
	iow(db, DM9000_IMR, IMR_PAR);	/* Disable all interrupt */
	iow(db, DM9000_RCR, 0x00);	/* Disable RX */
}

/*
 * Stop the interface.
 * The interface is stopped when it is brought.
 */
static int
dm9000_stop(struct net_device *ndev)
{
	board_info_t *db = netdev_priv(ndev);

	if (netif_msg_ifdown(db))
		dev_dbg(db->dev, "shutting down %s\n", ndev->name);

	cancel_delayed_work_sync(&db->phy_poll);

	netif_stop_queue(ndev);
	netif_carrier_off(ndev);

	/* free interrupt */
	free_irq(ndev->irq, ndev);

	dm9000_shutdown(ndev);

	return 0;
}

#define res_size(_r) (((_r)->end - (_r)->start) + 1)

static const struct net_device_ops dm9000_netdev_ops = {
         .ndo_open = dm9000_open,
         .ndo_stop = dm9000_stop,
         .ndo_start_xmit = dm9000_start_xmit,
         .ndo_tx_timeout = dm9000_timeout,
         .ndo_set_multicast_list = dm9000_hash_table,
         .ndo_do_ioctl = dm9000_ioctl,
         .ndo_change_mtu = eth_change_mtu,
         .ndo_validate_addr = eth_validate_addr,
         .ndo_set_mac_address = eth_mac_addr,
    #ifdef CONFIG_NET_POLL_CONTROLLER
         .ndo_poll_controller = dm9000_poll_controller,
    #endif
};


/*
 * Search DM9000 board, allocate space and register it
 */
static int __devinit
dm9000_probe(struct platform_device *pdev)
{
	struct dm9000_plat_data *pdata = pdev->dev.platform_data;
	struct board_info *db;	/* Point a board information structure */
	struct net_device *ndev;
	const unsigned char *mac_src;
	int ret = 0;
	int iosize;
	int i;
	u32 id_val;

	/* Init network device */
	ndev = alloc_etherdev(sizeof(struct board_info));
	if (!ndev) {
		dev_err(&pdev->dev, "could not allocate device.\n");
		return -ENOMEM;
	}

	SET_NETDEV_DEV(ndev, &pdev->dev);

	dev_dbg(&pdev->dev, "dm9000_probe()\n");
	 ndev->netdev_ops = &dm9000_netdev_ops;
	/* setup board info structure */
	db = netdev_priv(ndev);
	memset(db, 0, sizeof(*db));

	db->dev = &pdev->dev;
	db->ndev = ndev;

	spin_lock_init(&db->lock);
	mutex_init(&db->addr_lock);

	INIT_DELAYED_WORK(&db->phy_poll, dm9000_poll_work);

	db->addr_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
	db->data_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
	db->irq_res  = platform_get_resource(pdev, IORESOURCE_IRQ, 0);

	if (db->addr_res == NULL || db->data_res == NULL ||
	    db->irq_res == NULL) {
		dev_err(db->dev, "insufficient resources\n");
		ret = -ENOENT;
		goto out;
	}

	iosize = res_size(db->addr_res);
	db->addr_req = request_mem_region(db->addr_res->start, iosize,
					  pdev->name);

	if (db->addr_req == NULL) {
		dev_err(db->dev, "cannot claim address reg area\n");
		ret = -EIO;
		goto out;
	}

	db->io_addr = ioremap(db->addr_res->start, iosize);

	if (db->io_addr == NULL) {
		dev_err(db->dev, "failed to ioremap address reg\n");
		ret = -EINVAL;
		goto out;
	}

	iosize = res_size(db->data_res);
	db->data_req = request_mem_region(db->data_res->start, iosize,
					  pdev->name);

	if (db->data_req == NULL) {
		dev_err(db->dev, "cannot claim data reg area\n");
		ret = -EIO;
		goto out;
	}

	db->io_data = ioremap(db->data_res->start, iosize);

	if (db->io_data == NULL) {
		dev_err(db->dev, "failed to ioremap data reg\n");
		ret = -EINVAL;
		goto out;
	}

	/* fill in parameters for net-dev structure */
	ndev->base_addr = (unsigned long)db->io_addr;
	ndev->irq	= db->irq_res->start;
	//Stone add
	printk("[dm9] %s ndev->irq=%x \n",__func__,ndev->irq);

	/* ensure at least we have a default set of IO routines */
	dm9000_set_io(db, iosize);

	/* check to see if anything is being over-ridden */
	if (pdata != NULL) {
		/* check to see if the driver wants to over-ride the
		 * default IO width */

		if (pdata->flags & DM9000_PLATF_8BITONLY)
			dm9000_set_io(db, 1);

		if (pdata->flags & DM9000_PLATF_16BITONLY)
			dm9000_set_io(db, 2);

		if (pdata->flags & DM9000_PLATF_32BITONLY)
			dm9000_set_io(db, 4);

		/* check to see if there are any IO routine
		 * over-rides */

		if (pdata->inblk != NULL)
			db->inblk = pdata->inblk;

		if (pdata->outblk != NULL)
			db->outblk = pdata->outblk;

		if (pdata->dumpblk != NULL)
			db->dumpblk = pdata->dumpblk;

		db->flags = pdata->flags;
	}

#ifdef CONFIG_DM9000_FORCE_SIMPLE_PHY_POLL
	db->flags |= DM9000_PLATF_SIMPLE_PHY;
#endif

//Stone add
//	dm9000_reset(db);
  iow(db, DM9000_NCR, 0x03);

	/* try multiple times, DM9000 sometimes gets the read wrong */
	for (i = 0; i < 8; i++) {
		id_val  = ior(db, DM9000_VIDL);
		id_val |= (u32)ior(db, DM9000_VIDH) << 8;
		id_val |= (u32)ior(db, DM9000_PIDL) << 16;
		id_val |= (u32)ior(db, DM9000_PIDH) << 24;

		printk("[dm9].%d read id 0x%08x\n", i+1, id_val);
		
		if (id_val == DM9000_ID)
			break;
		dev_err(db->dev, "read wrong id 0x%08x\n", id_val);
	}
	
	printk(KERN_INFO "[%s Ethernet Driver, V%s]: KV= %d.%d.%d !!\n", CARDNAME, DRV_VERSION,  //JJ1 
	                            (LINUX_VERSION_CODE>>16 & 0xff),
	                            (LINUX_VERSION_CODE>>8 & 0xff),
	                            (LINUX_VERSION_CODE & 0xff) ); //#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
	printk(KERN_INFO "[%s Ethernet Driver, V%s]: ChipID= 0x%x !!\n", CARDNAME, DRV_VERSION, id_val ); // JJ1

	if (id_val != DM9000_ID) {
		dev_err(db->dev, "wrong id: 0x%08x\n", id_val);
		ret = -ENODEV;
		goto out;
	}

	/* Identify what type of DM9000 we are working on */

	id_val = ior(db, DM9000_CHIPR);
	dev_dbg(db->dev, "dm9000 revision 0x%02x\n", id_val);
	printk(KERN_INFO "[DM9000]dm9000 revision 0x%02x\n", id_val); //V_R1

	switch (id_val) {
	case CHIPR_DM9000A:
		db->type = TYPE_DM9000A;
		break;
	case CHIPR_DM9000B:
		db->type = TYPE_DM9000B;
		break;
	default:
		dev_dbg(db->dev, "ID %02x => defaulting to DM9000E\n", id_val);
		db->type = TYPE_DM9000E;
	}

	/* from this point we assume that we have found a DM9000 */

	/* driver system function */
	ether_setup(ndev);
	
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
			ndev->netdev_ops = &dm9000_netdev_ops; // new kernel 2.6.31 
			ndev->watchdog_timeo	= msecs_to_jiffies(watchdog);	
	 		ndev->ethtool_ops	 = &dm9000_ethtool_ops;
#else
	ndev->open		 = &dm9000_open;
	ndev->hard_start_xmit    = &dm9000_start_xmit;
	ndev->tx_timeout         = &dm9000_timeout;
	ndev->watchdog_timeo = msecs_to_jiffies(watchdog);
	ndev->stop		 = &dm9000_stop;
	ndev->set_multicast_list = &dm9000_hash_table;
	ndev->ethtool_ops	 = &dm9000_ethtool_ops;
	ndev->do_ioctl		 = &dm9000_ioctl;
#endif


#ifdef CONFIG_NET_POLL_CONTROLLER
	ndev->poll_controller	 = &dm9000_poll_controller;
#endif

	db->msg_enable       = NETIF_MSG_LINK;
	db->mii.phy_id_mask  = 0x1f;
	db->mii.reg_num_mask = 0x1f;
	db->mii.force_media  = 0;
	db->mii.full_duplex  = 0;
	db->mii.dev	     = ndev;
	db->mii.mdio_read    = dm9000_phy_read;
	db->mii.mdio_write   = dm9000_phy_write;

	mac_src = "eeprom";

	/* try reading the node address from the attached EEPROM */
	for (i = 0; i < 6; i += 2)
		dm9000_read_eeprom(db, i / 2, ndev->dev_addr+i);

	if (!is_valid_ether_addr(ndev->dev_addr) && pdata != NULL) {
		mac_src = "platform data";
		memcpy(ndev->dev_addr, pdata->dev_addr, 6);
	}

	if (!is_valid_ether_addr(ndev->dev_addr)) {
		/* try reading from mac */
		
		mac_src = "chip";

		static unsigned char mac_addr[6] = {0x00,0x11,0x22,0x33,0x44,0x55};
		static unsigned char mac_tmp[6] = {0xff,0xff,0xff,0xff,0xff,0xff};
		for (i = 0; i < 6; i++)
			ndev->dev_addr[i] = ior(db, i+DM9000_PAR);

		// Mark Chang 20100521
		// -------------------
		if (!memcmp(ndev->dev_addr, mac_tmp, 6))
			memcpy(ndev->dev_addr, mac_addr, 6);
	}

	if (!is_valid_ether_addr(ndev->dev_addr))
		dev_warn(db->dev, "%s: Invalid ethernet MAC address. Please "
			 "set using ifconfig\n", ndev->name);

	platform_set_drvdata(pdev, ndev);
	ret = register_netdev(ndev);

	if (ret == 0)
		printk(KERN_INFO "%s: dm9000%c at %p,%p IRQ %d MAC: %pM (%s)\n",
		       ndev->name, dm9000_type_to_char(db->type),
		       db->io_addr, db->io_data, ndev->irq,
		       ndev->dev_addr, mac_src);
	return 0;

out:
	dev_err(db->dev, "not found (%d).\n", ret);

	dm9000_release_board(pdev, db);
	free_netdev(ndev);

	return ret;
}

static int
dm9000_drv_suspend(struct platform_device *dev, pm_message_t state)
{
	struct net_device *ndev = platform_get_drvdata(dev);
	board_info_t *db;

	if (ndev) {
		db = netdev_priv(ndev);
		db->in_suspend = 1;

		if (netif_running(ndev)) {
			netif_device_detach(ndev);
			dm9000_shutdown(ndev);
		}
	}
	return 0;
}

static int
dm9000_drv_resume(struct platform_device *dev)
{
	struct net_device *ndev = platform_get_drvdata(dev);
	board_info_t *db = netdev_priv(ndev);

	if (ndev) {

		if (netif_running(ndev)) {
			dm9000_reset(db);
			dm9000_init_dm9000(ndev);

			netif_device_attach(ndev);
		}

		db->in_suspend = 0;
	}
	return 0;
}

static int __devexit
dm9000_drv_remove(struct platform_device *pdev)
{
	struct net_device *ndev = platform_get_drvdata(pdev);

	platform_set_drvdata(pdev, NULL);

	unregister_netdev(ndev);
	dm9000_release_board(pdev, (board_info_t *) netdev_priv(ndev));
	free_netdev(ndev);		/* free device structure */

	dev_dbg(&pdev->dev, "released and freed device\n");
	return 0;
}

static struct platform_driver dm9000_driver = {
	.driver	= {
		.name    = "dm9000",
		.owner	 = THIS_MODULE,
	},
	.probe   = dm9000_probe,
	.remove  = __devexit_p(dm9000_drv_remove),
	.suspend = dm9000_drv_suspend,
	.resume  = dm9000_drv_resume,
};

static int __init
dm9000_init(void)
{
	printk(KERN_INFO "%s Ethernet Driver, V%s\n", CARDNAME, DRV_VERSION);

	return platform_driver_register(&dm9000_driver);
}

static void __exit
dm9000_cleanup(void)
{
	platform_driver_unregister(&dm9000_driver);
}

module_init(dm9000_init);
module_exit(dm9000_cleanup);

MODULE_AUTHOR("Sascha Hauer, Ben Dooks");
MODULE_DESCRIPTION("Davicom DM9000 network driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:dm9000");

[-- Attachment #3: dm9000_KT2.6.31.h --]
[-- Type: text/plain, Size: 4180 bytes --]

/*
 * dm9000 Ethernet
 */

#ifndef _DM9000X_H_
#define _DM9000X_H_

#define DM9000_ID		0x90000A46

/* although the registers are 16 bit, they are 32-bit aligned.
 */

#define DM9000_NCR             0x00
#define DM9000_NSR             0x01
#define DM9000_TCR             0x02
#define DM9000_TSR1            0x03
#define DM9000_TSR2            0x04
#define DM9000_RCR             0x05
#define DM9000_RSR             0x06
#define DM9000_ROCR            0x07
#define DM9000_BPTR            0x08
#define DM9000_FCTR            0x09
#define DM9000_FCR             0x0A
#define DM9000_EPCR            0x0B
#define DM9000_EPAR            0x0C
#define DM9000_EPDRL           0x0D
#define DM9000_EPDRH           0x0E
#define DM9000_WCR             0x0F

#define DM9000_PAR             0x10
#define DM9000_MAR             0x16

#define DM9000_GPCR	       0x1e
#define DM9000_GPR             0x1f
#define DM9000_TRPAL           0x22
#define DM9000_TRPAH           0x23
#define DM9000_RWPAL           0x24
#define DM9000_RWPAH           0x25

#define DM9000_VIDL            0x28
#define DM9000_VIDH            0x29
#define DM9000_PIDL            0x2A
#define DM9000_PIDH            0x2B

#define DM9000_CHIPR           0x2C
#define DM9000_SMCR            0x2F

#define CHIPR_DM9000A	       0x19
#define CHIPR_DM9000B	       0x1A  //V_R1 0x1B

#define DM9000_MRCMDX          0xF0
#define DM9000_MRCMD           0xF2
#define DM9000_MRRL            0xF4
#define DM9000_MRRH            0xF5
#define DM9000_MWCMDX          0xF6
#define DM9000_MWCMD           0xF8
#define DM9000_MWRL            0xFA
#define DM9000_MWRH            0xFB
#define DM9000_TXPLL           0xFC
#define DM9000_TXPLH           0xFD
#define DM9000_ISR             0xFE
#define DM9000_IMR             0xFF

#define NCR_EXT_PHY         (1<<7)
#define NCR_WAKEEN          (1<<6)
#define NCR_FCOL            (1<<4)
#define NCR_FDX             (1<<3)
#define NCR_LBK             (3<<1)
#define NCR_RST	            (1<<0)

#define NSR_SPEED           (1<<7)
#define NSR_LINKST          (1<<6)
#define NSR_WAKEST          (1<<5)
#define NSR_TX2END          (1<<3)
#define NSR_TX1END          (1<<2)
#define NSR_RXOV            (1<<1)

#define TCR_TJDIS           (1<<6)
#define TCR_EXCECM          (1<<5)
#define TCR_PAD_DIS2        (1<<4)
#define TCR_CRC_DIS2        (1<<3)
#define TCR_PAD_DIS1        (1<<2)
#define TCR_CRC_DIS1        (1<<1)
#define TCR_TXREQ           (1<<0)

#define TSR_TJTO            (1<<7)
#define TSR_LC              (1<<6)
#define TSR_NC              (1<<5)
#define TSR_LCOL            (1<<4)
#define TSR_COL             (1<<3)
#define TSR_EC              (1<<2)

#define RCR_WTDIS           (1<<6)
#define RCR_DIS_LONG        (1<<5)
#define RCR_DIS_CRC         (1<<4)
#define RCR_ALL	            (1<<3)
#define RCR_RUNT            (1<<2)
#define RCR_PRMSC           (1<<1)
#define RCR_RXEN            (1<<0)

#define RSR_RF              (1<<7)
#define RSR_MF              (1<<6)
#define RSR_LCS             (1<<5)
#define RSR_RWTO            (1<<4)
#define RSR_PLE             (1<<3)
#define RSR_AE              (1<<2)
#define RSR_CE              (1<<1)
#define RSR_FOE             (1<<0)

#define FCTR_HWOT(ot)	(( ot & 0xf ) << 4 )
#define FCTR_LWOT(ot)	( ot & 0xf )

#define IMR_PAR             (1<<7)
#define IMR_ROOM            (1<<3)
#define IMR_ROM             (1<<2)
#define IMR_PTM             (1<<1)
#define IMR_PRM             (1<<0)

#define ISR_ROOS            (1<<3)
#define ISR_ROS             (1<<2)
#define ISR_PTS             (1<<1)
#define ISR_PRS             (1<<0)
#define ISR_CLR_STATUS      (ISR_ROOS | ISR_ROS | ISR_PTS | ISR_PRS)

#define EPCR_REEP           (1<<5)
#define EPCR_WEP            (1<<4)
#define EPCR_EPOS           (1<<3)
#define EPCR_ERPRR          (1<<2)
#define EPCR_ERPRW          (1<<1)
#define EPCR_ERRE           (1<<0)

#define GPCR_GEP_CNTL       (1<<0)

#define DM9000_PKT_RDY		0x01	/* Packet ready to receive */
#define DM9000_PKT_MAX		1536	/* Received packet max size */

/* DM9000A / DM9000B definitions */

#define IMR_LNKCHNG		(1<<5)
#define IMR_UNDERRUN		(1<<4)

#define ISR_LNKCHNG		(1<<5)
#define ISR_UNDERRUN		(1<<4)

#endif /* _DM9000X_H_ */


^ permalink raw reply

* [PATCH v5 02/10] ipc: "use key as id" functionality for resource get system call introduced
From: Stanislav Kinsbursky @ 2012-09-19 16:05 UTC (permalink / raw)
  To: akpm
  Cc: manfred, a.p.zijlstra, netdev, will.deacon, linux-kernel,
	cmetcalf, jmorris, linux-driver, linux-security-module, hughd,
	ron.mercer, viro, james.l.morris, catalin.marinas, casey, eparis,
	sds, jitendra.kalsaria, devel
In-Reply-To: <20120919160430.11254.86848.stgit@localhost6.localdomain6>

This patch introduces new IPC resource get request flag IPC_PRESET, which
should be interpreted as a request to try to allocate IPC slot with number,
starting from value resented by key. IOW, kernel will try
allocate new segment in specified slot.

Note: if desired slot is not emply, then next free slot will be used.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
---
 include/linux/ipc.h |    1 +
 ipc/msg.c           |    4 +++-
 ipc/sem.c           |    4 +++-
 ipc/shm.c           |    4 +++-
 ipc/util.c          |   18 +++++++++++++++---
 ipc/util.h          |    3 ++-
 6 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index 30e8161..d7e5632 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -24,6 +24,7 @@ struct ipc_perm
 #define IPC_CREAT  00001000   /* create if key is nonexistent */
 #define IPC_EXCL   00002000   /* fail if key exists */
 #define IPC_NOWAIT 00004000   /* return error on wait */
+#define IPC_PRESET 00040000   /* use key as id */
 
 /* these fields are used by the DIPC package so the kernel as standard
    should avoid using them if possible */
diff --git a/ipc/msg.c b/ipc/msg.c
index f3bfbb8..1cecaf2 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -190,6 +190,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 
 	msq->q_perm.mode = msgflg & S_IRWXUGO;
 	msq->q_perm.key = key;
+	msq->q_perm.id = (msgflg & IPC_PRESET) ? key : 0;
 
 	msq->q_perm.security = NULL;
 	retval = security_msg_queue_alloc(msq);
@@ -201,7 +202,8 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 	/*
 	 * ipc_addid() locks msq
 	 */
-	id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
+	id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni,
+		       msgflg & IPC_PRESET);
 	if (id < 0) {
 		security_msg_queue_free(msq);
 		ipc_rcu_putref(msq);
diff --git a/ipc/sem.c b/ipc/sem.c
index 5215a81..e89b90c 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -306,6 +306,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 
 	sma->sem_perm.mode = (semflg & S_IRWXUGO);
 	sma->sem_perm.key = key;
+	sma->sem_perm.id = (semflg & IPC_PRESET) ? key : 0;
 
 	sma->sem_perm.security = NULL;
 	retval = security_sem_alloc(sma);
@@ -314,7 +315,8 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 		return retval;
 	}
 
-	id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
+	id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni,
+		       semflg & IPC_PRESET);
 	if (id < 0) {
 		security_sem_free(sma);
 		ipc_rcu_putref(sma);
diff --git a/ipc/shm.c b/ipc/shm.c
index 00faa05..0088418 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -480,6 +480,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 
 	shp->shm_perm.key = key;
 	shp->shm_perm.mode = (shmflg & S_IRWXUGO);
+	shp->shm_perm.id = (shmflg & IPC_PRESET) ? key : 0;
 	shp->mlock_user = NULL;
 
 	shp->shm_perm.security = NULL;
@@ -510,7 +511,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	if (IS_ERR(file))
 		goto no_file;
 
-	id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
+	id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni,
+		       shmflg & IPC_PRESET);
 	if (id < 0) {
 		error = id;
 		goto no_id;
diff --git a/ipc/util.c b/ipc/util.c
index eb07fd3..328abd1 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -238,16 +238,22 @@ int ipc_get_maxid(struct ipc_ids *ids)
  *	@ids: IPC identifier set
  *	@new: new IPC permission set
  *	@size: limit for the number of used ids
+ *	@preset: use passed new->id value as desired id
  *
  *	Add an entry 'new' to the IPC ids idr. The permissions object is
  *	initialised and the first free entry is set up and the id assigned
  *	is returned. The 'new' entry is returned in a locked state on success.
  *	On failure the entry is not locked and a negative err-code is returned.
  *
+ *	If 'preset' is set, then passed new->id is desired to be set for new
+ *	segment. And allocated id is equal to passed value, then ipc ids will
+ *	left unchanged and new->seq will be updated to correspond specified id value.
+ *
  *	Called with ipc_ids.rw_mutex held as a writer.
  */
  
-int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
+int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size,
+	      int preset)
 {
 	uid_t euid;
 	gid_t egid;
@@ -264,7 +270,8 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
 	rcu_read_lock();
 	spin_lock(&new->lock);
 
-	err = idr_get_new(&ids->ipcs_idr, new, &id);
+	err = idr_get_new_above(&ids->ipcs_idr, new,
+				ipcid_to_idx(new->id), &id);
 	if (err) {
 		spin_unlock(&new->lock);
 		rcu_read_unlock();
@@ -277,6 +284,11 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
 	new->cuid = new->uid = euid;
 	new->gid = new->cgid = egid;
 
+	if (preset && ipcid_to_idx(new->id) == id) {
+		new->seq = ipcid_to_seq(new->id);
+		return id;
+	}
+
 	new->seq = ids->seq++;
 	if(ids->seq > ids->seq_max)
 		ids->seq = 0;
@@ -736,7 +748,7 @@ struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id)
 int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
 			struct ipc_ops *ops, struct ipc_params *params)
 {
-	if (params->key == IPC_PRIVATE)
+	if (params->key == IPC_PRIVATE && ((params->flg & IPC_PRESET) == 0))
 		return ipcget_new(ns, ids, ops, params);
 	else
 		return ipcget_public(ns, ids, ops, params);
diff --git a/ipc/util.h b/ipc/util.h
index 850ef3e..878df18 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -92,9 +92,10 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
 #define IPC_SHM_IDS	2
 
 #define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER)
+#define ipcid_to_seq(id) ((id) / SEQ_MULTIPLIER)
 
 /* must be called with ids->rw_mutex acquired for writing */
-int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);
+int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int, int);
 
 /* must be called with ids->rw_mutex acquired for reading */
 int ipc_get_maxid(struct ipc_ids *);

^ permalink raw reply related

* [PATCH v5 00/10] IPC: checkpoint/restore in userspace enhancements
From: Stanislav Kinsbursky @ 2012-09-19 16:05 UTC (permalink / raw)
  To: akpm
  Cc: manfred, a.p.zijlstra, netdev, will.deacon, linux-kernel,
	cmetcalf, jmorris, linux-driver, linux-security-module, hughd,
	ron.mercer, viro, james.l.morris, catalin.marinas, casey, eparis,
	sds, jitendra.kalsaria, devel

v5:
1) Several define-dependent compile bugs fixed
2) IPC message copy test updated
3) A couple of minor fixes.
4) Qlogic driver update: rename of its internal SEM_SET define into SEM_INIT
(compile error).

v4:
1) If MSG_COPY flag is specified, then "mtype" is not a type, but message
number to copy.
2) MSG_SET_COPY logic for sys_msgctl() was removed.

v3:
1) Copy messages to user-space under spinlock was replaced by allocation of
dummy message before queue lock and then copy of desired message to the dummy
one instead of unlinking it from queue list.
I.e. the message queue copy logic was changed: messages can be retrived one by
one (instead of receiving of the whole list at once).

This patch set is aimed to provide additional functionality for all IPC objects,
which is required for migration of these objects by user-space
checkpoint/restore utils (CRIU).

The main problem here was impossibility to set up object id. This patch set
solves the problem in two steps:
1) Makes it possible to create new object (shared memory, semaphores set or
messages queue) with ID, equal to passed key.
2) Makes it possible to change existent object key.

Another problem was to peek messages from queues without deleting them.
This was achived by introducing of new MSG_COPY flag for sys_msgrcv(). If
MSG_COPY flag is set, then msgtyp is interpreted as message number.

The following series implements...

---

Stanislav Kinsbursky (10):
      ipc: remove forced assignment of selected message
      ipc: "use key as id" functionality for resource get system call introduced
      ipc: segment key change helper introduced
      ipc: add new SHM_SET command for sys_shmctl() call
      ipc: add new MSG_SET command for sys_msgctl() call
      glge driver: rename internal SEM_SET macro to SEM_INIT
      ipc: add new SEM_SET command for sys_semctl() call
      IPC: message queue receive cleanup
      IPC: message queue copy feature introduced
      test: IPC message queue copy feture test


 drivers/net/ethernet/qlogic/qlge/qlge.h      |    4 
 drivers/net/ethernet/qlogic/qlge/qlge_main.c |   16 +-
 include/linux/ipc.h                          |    1 
 include/linux/msg.h                          |    7 +
 include/linux/sem.h                          |    1 
 include/linux/shm.h                          |    1 
 ipc/compat.c                                 |   45 +++--
 ipc/msg.c                                    |  112 +++++++++---
 ipc/msgutil.c                                |   38 ++++
 ipc/sem.c                                    |   14 +
 ipc/shm.c                                    |   17 +-
 ipc/util.c                                   |   69 +++++++
 ipc/util.h                                   |    6 +
 security/selinux/hooks.c                     |    3 
 security/smack/smack_lsm.c                   |    3 
 tools/testing/selftests/ipc/Makefile         |   28 +++
 tools/testing/selftests/ipc/msgque.c         |  251 ++++++++++++++++++++++++++
 17 files changed, 547 insertions(+), 69 deletions(-)
 create mode 100644 tools/testing/selftests/ipc/Makefile
 create mode 100644 tools/testing/selftests/ipc/msgque.c

-- 
Signature

^ permalink raw reply

* [PATCH v5 10/10] test: IPC message queue copy feture test
From: Stanislav Kinsbursky @ 2012-09-19 16:06 UTC (permalink / raw)
  To: akpm
  Cc: manfred, a.p.zijlstra, netdev, will.deacon, linux-kernel,
	cmetcalf, jmorris, linux-driver, linux-security-module, hughd,
	ron.mercer, viro, james.l.morris, catalin.marinas, casey, eparis,
	sds, jitendra.kalsaria, devel
In-Reply-To: <20120919160430.11254.86848.stgit@localhost6.localdomain6>

This test can be used to check wheither kernel supports IPC message queue copy
and restore features (required by CRIU project).
---
 tools/testing/selftests/ipc/Makefile |   28 ++++
 tools/testing/selftests/ipc/msgque.c |  251 ++++++++++++++++++++++++++++++++++
 2 files changed, 279 insertions(+), 0 deletions(-)
 create mode 100644 tools/testing/selftests/ipc/Makefile
 create mode 100644 tools/testing/selftests/ipc/msgque.c

diff --git a/tools/testing/selftests/ipc/Makefile b/tools/testing/selftests/ipc/Makefile
new file mode 100644
index 0000000..6c547bf
--- /dev/null
+++ b/tools/testing/selftests/ipc/Makefile
@@ -0,0 +1,28 @@
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/)
+ifeq ($(ARCH),i386)
+        ARCH := X86
+	CFLAGS := -DCONFIG_X86_32 -D__i386__
+endif
+ifeq ($(ARCH),x86_64)
+	ARCH := X86
+	CFLAGS := -DCONFIG_X86_64 -D__x86_64__
+endif
+
+CFLAGS += -I../../../../arch/x86/include/generated/
+CFLAGS += -I../../../../include/
+CFLAGS += -I../../../../usr/include/
+CFLAGS += -I../../../../arch/x86/include/
+
+all:
+ifeq ($(ARCH),X86)
+	gcc $(CFLAGS) msgque.c -o msgque_test
+else
+	echo "Not an x86 target, can't build msgque selftest"
+endif
+
+run_tests: all
+	./msgque_test
+
+clean:
+	rm -fr ./msgque_test
diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c
new file mode 100644
index 0000000..ffcc8b7
--- /dev/null
+++ b/tools/testing/selftests/ipc/msgque.c
@@ -0,0 +1,251 @@
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/ipc.h>
+#include <sys/msg.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define MAX_MSG_SIZE		32
+
+struct msg1 {
+	int msize;
+	long mtype;
+	char mtext[MAX_MSG_SIZE];
+};
+
+#define TEST_STRING "Test sysv5 msg"
+#define MSG_TYPE 1
+
+#define ANOTHER_TEST_STRING "Yet another test sysv5 msg"
+#define ANOTHER_MSG_TYPE 26538
+
+#ifndef IPC_PRESET
+#define IPC_PRESET		00040000
+#endif
+
+#ifndef MSG_COPY
+#define MSG_COPY		040000
+#endif
+
+#ifndef MSG_SET
+#define MSG_SET			13
+#endif
+
+#if defined (__GLIBC__) && __GLIBC__ >= 2
+#define KEY __key
+#else
+#define KEY key
+#endif
+
+struct msgque_data {
+	int msq_id;
+	int qbytes;
+	int kern_id;
+	int qnum;
+	int mode;
+	struct msg1 *messages;
+};
+
+int restore_queue(struct msgque_data *msgque)
+{
+	struct msqid_ds ds;
+	int id, i;
+
+	id = msgget(msgque->msq_id,
+		     msgque->mode | IPC_CREAT | IPC_EXCL | IPC_PRESET);
+	if (id == -1) {
+		printf("Failed to create queue\n");
+		return -errno;
+	}
+
+	if (id != msgque->msq_id) {
+		printf("Failed to preset id (%d instead of %d)\n",
+							id, msgque->msq_id);
+		return -EFAULT;
+	}
+
+	if (msgctl(id, MSG_STAT, &ds) < 0) {
+		printf("Failed to stat queue\n");
+		return -errno;
+	}
+
+	ds.msg_perm.KEY = msgque->msq_id;
+	ds.msg_qbytes = msgque->qbytes;
+	if (msgctl(id, MSG_SET, &ds) < 0) {
+		printf("Failed to update message key\n");
+		return -errno;
+	}
+
+	for (i = 0; i < msgque->qnum; i++) {
+		if (msgsnd(msgque->msq_id, &msgque->messages[i].mtype, msgque->messages[i].msize, IPC_NOWAIT) != 0) {
+			printf("msgsnd failed (%m)\n");
+			return -errno;
+		};
+	}
+	return 0;
+}
+
+int check_and_destroy_queue(struct msgque_data *msgque)
+{
+	struct msg1 message;
+	int cnt = 0, ret;
+
+	while (1) {
+		ret = msgrcv(msgque->msq_id, &message.mtype, MAX_MSG_SIZE, 0, IPC_NOWAIT);
+		if (ret < 0) {
+			if (errno == ENOMSG)
+				break;
+			printf("Failed to read IPC message: %m\n");
+			ret = -errno;
+			goto err;
+		}
+		if (ret != msgque->messages[cnt].msize) {
+			printf("Wrong message size: %d (expected %d)\n", ret, msgque->messages[cnt].msize);
+			ret = -EINVAL;
+			goto err;
+		}
+		if (message.mtype != msgque->messages[cnt].mtype) {
+			printf("Wrong message type\n");
+			ret = -EINVAL;
+			goto err;
+		}
+		if (memcmp(message.mtext, msgque->messages[cnt].mtext, ret)) {
+			printf("Wrong message content\n");
+			ret = -EINVAL;
+			goto err;
+		}
+		cnt++;
+	}
+
+	if (cnt != msgque->qnum) {
+		printf("Wrong message number\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	ret = 0;
+err:
+	if (msgctl(msgque->msq_id, IPC_RMID, 0)) {
+		printf("Failed to destroy queue: %d\n", -errno);
+		return -errno;
+	}
+	return ret;
+}
+
+int dump_queue(struct msgque_data *msgque)
+{
+	struct msqid_ds ds;
+	int i, ret;
+
+	for (msgque->kern_id = 0; msgque->kern_id < 256; msgque->kern_id++) {
+		ret = msgctl(msgque->kern_id, MSG_STAT, &ds);
+		if (ret < 0) {
+			if (errno == -EINVAL)
+				continue;
+			printf("Failed to get stats for IPC queue with id %d\n", msgque->kern_id);
+			return -errno;
+		}
+
+		if (ret == msgque->msq_id)
+			break;
+	}
+
+	msgque->messages = malloc(sizeof(struct msg1) * ds.msg_qnum);
+	if (msgque->messages == NULL) {
+		printf("Failed to get stats for IPC queue\n");
+		return -ENOMEM;
+	}
+
+	msgque->qnum = ds.msg_qnum;
+	msgque->mode = ds.msg_perm.mode;
+	msgque->qbytes = ds.msg_qbytes;
+
+	for (i = 0; i < msgque->qnum; i++) {
+		ret = msgrcv(msgque->msq_id, &msgque->messages[i].mtype, MAX_MSG_SIZE, i, IPC_NOWAIT | MSG_COPY);
+		if (ret < 0) {
+			printf("Failed to copy IPC message: %m (%d)\n", errno);
+			return -errno;
+		}
+		msgque->messages[i].msize = ret;
+	}
+	return 0;
+}
+
+int fill_msgque(struct msgque_data *msgque)
+{
+	struct msg1 msgbuf;
+
+	msgbuf.mtype = MSG_TYPE;
+	memcpy(msgbuf.mtext, TEST_STRING, sizeof(TEST_STRING));
+	if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(TEST_STRING), IPC_NOWAIT) != 0) {
+		printf("First message send failed (%m)\n");
+		return -errno;
+	};
+
+	msgbuf.mtype = ANOTHER_MSG_TYPE;
+	memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING));
+	if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(ANOTHER_TEST_STRING), IPC_NOWAIT) != 0) {
+		printf("Second message send failed (%m)\n");
+		return -errno;
+	};
+	return 0;
+}
+
+int main (int argc, char **argv)
+{
+	key_t key;
+	int msg, pid, err;
+	struct msgque_data msgque;
+
+	key = ftok(argv[0], 822155650);
+	if (key == -1) {
+		printf("Can't make key\n");
+		return -errno;
+	}
+
+	msgque.msq_id = msgget(key, IPC_CREAT | IPC_EXCL | 0666);
+	if (msgque.msq_id == -1) {
+		printf("Can't create queue\n");
+		goto err_out;
+	}
+
+	err = fill_msgque(&msgque);
+	if (err) {
+		printf("Failed to fill queue\n");
+		goto err_destroy;
+	}
+
+	err = dump_queue(&msgque);
+	if (err) {
+		printf("Failed to dump queue\n");
+		goto err_destroy;
+	}
+
+	err = check_and_destroy_queue(&msgque);
+	if (err) {
+		printf("Failed to check and destroy queue\n");
+		goto err_out;
+	}
+
+	err = restore_queue(&msgque);
+	if (err) {
+		printf("Failed to restore queue\n");
+		goto err_destroy;
+	}
+
+	err = check_and_destroy_queue(&msgque);
+	if (err) {
+		printf("Failed to test queue\n");
+		goto err_out;
+	}
+	return 0;
+
+err_destroy:
+	if (msgctl(msgque.msq_id, IPC_RMID, 0)) {
+		printf("Failed to destroy queue: %d\n", -errno);
+		return -errno;
+	}
+err_out:
+	return err;
+}

^ permalink raw reply related

* [PATCH v5 09/10] IPC: message queue copy feature introduced
From: Stanislav Kinsbursky @ 2012-09-19 16:06 UTC (permalink / raw)
  To: akpm
  Cc: manfred, a.p.zijlstra, netdev, will.deacon, linux-kernel,
	cmetcalf, jmorris, linux-driver, linux-security-module, hughd,
	ron.mercer, viro, james.l.morris, catalin.marinas, casey, eparis,
	sds, jitendra.kalsaria, devel
In-Reply-To: <20120919160430.11254.86848.stgit@localhost6.localdomain6>

This patch is required for checkpoint/restore in userspace.
IOW, c/r requires some way to get all pending IPC messages without deleting
them from the queue (checkpoint can fail and in this case tasks will be resumed,
so queue have to be valid).
To achive this, new operation flag MSG_COPY for sys_msgrcv() system call was
introduced. If this flag was specified, then mtype is interpreted as number of
the message to copy.
If MSG_COPY is set, then kernel will allocate dummy message with passed size,
and then use new copy_msg() helper function to copy desired message (instead of
unlinking it from the queue).

Notes:
1) Return -ENOSYS if MSG_COPY is specified, but CONFIG_CHECKPOINT_RESTORE is
not set.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
---
 include/linux/msg.h |    1 +
 ipc/msg.c           |   50 ++++++++++++++++++++++++++++++++++++++++++++++++--
 ipc/msgutil.c       |   38 ++++++++++++++++++++++++++++++++++++++
 ipc/util.h          |    1 +
 4 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/include/linux/msg.h b/include/linux/msg.h
index 9411b76..4ca337f 100644
--- a/include/linux/msg.h
+++ b/include/linux/msg.h
@@ -11,6 +11,7 @@
 /* msgrcv options */
 #define MSG_NOERROR     010000  /* no error if message is too big */
 #define MSG_EXCEPT      020000  /* recv any msg except of specified type.*/
+#define MSG_COPY        040000  /* copy (not remove) all queue messages */
 
 /* Obsolete, used only for backwards compatibility and libc5 compiles */
 struct msqid_ds {
diff --git a/ipc/msg.c b/ipc/msg.c
index d8168a7..0984f07 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -785,19 +785,48 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
 	struct msg_msg *msg;
 	int mode;
 	struct ipc_namespace *ns;
+#ifdef CONFIG_CHECKPOINT_RESTORE
+	struct msg_msg *copy = NULL;
+	unsigned long copy_number = 0;
+#endif
 
 	if (msqid < 0 || (long) bufsz < 0)
 		return -EINVAL;
+	if (msgflg & MSG_COPY) {
+#ifdef CONFIG_CHECKPOINT_RESTORE
+
+		if (msgflg & MSG_COPY) {
+			copy_number = msgtyp;
+			msgtyp = 0;
+		}
+
+		/*
+		 * Create dummy message to copy real message to.
+		 */
+		copy = load_msg(buf, bufsz);
+		if (IS_ERR(copy))
+			return PTR_ERR(copy);
+		copy->m_ts = bufsz;
+#else
+		return -ENOSYS;
+#endif
+	}
 	mode = convert_mode(&msgtyp, msgflg);
 	ns = current->nsproxy->ipc_ns;
 
 	msq = msg_lock_check(ns, msqid);
-	if (IS_ERR(msq))
+	if (IS_ERR(msq)) {
+#ifdef CONFIG_CHECKPOINT_RESTORE
+		if (msgflg & MSG_COPY)
+			free_msg(copy);
+#endif
 		return PTR_ERR(msq);
+	}
 
 	for (;;) {
 		struct msg_receiver msr_d;
 		struct list_head *tmp;
+		long msg_counter = 0;
 
 		msg = ERR_PTR(-EACCES);
 		if (ipcperms(ns, &msq->q_perm, S_IRUGO))
@@ -817,10 +846,18 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
 						walk_msg->m_type != 1) {
 					msg = walk_msg;
 					msgtyp = walk_msg->m_type - 1;
+#ifdef CONFIG_CHECKPOINT_RESTORE
+				} else if (msgflg & MSG_COPY) {
+					if (copy_number == msg_counter) {
+						msg = copy_msg(walk_msg, copy);
+						break;
+					}
+#endif
 				} else {
 					msg = walk_msg;
 					break;
 				}
+				msg_counter++;
 			}
 			tmp = tmp->next;
 		}
@@ -833,6 +870,10 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
 				msg = ERR_PTR(-E2BIG);
 				goto out_unlock;
 			}
+#ifdef CONFIG_CHECKPOINT_RESTORE
+			if (msgflg & MSG_COPY)
+				goto out_unlock;
+#endif
 			list_del(&msg->m_list);
 			msq->q_qnum--;
 			msq->q_rtime = get_seconds();
@@ -916,8 +957,13 @@ out_unlock:
 			break;
 		}
 	}
-	if (IS_ERR(msg))
+	if (IS_ERR(msg)) {
+#ifdef CONFIG_CHECKPOINT_RESTORE
+		if (msgflg & MSG_COPY)
+			free_msg(copy);
+#endif
 		return PTR_ERR(msg);
+	}
 
 	bufsz = msg_handler(buf, msg, bufsz);
 	free_msg(msg);
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 26143d3..b281f5c 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -100,7 +100,45 @@ out_err:
 	free_msg(msg);
 	return ERR_PTR(err);
 }
+#ifdef CONFIG_CHECKPOINT_RESTORE
+struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst)
+{
+	struct msg_msgseg *dst_pseg, *src_pseg;
+	int len = src->m_ts;
+	int alen;
+
+	BUG_ON(dst == NULL);
+	if (src->m_ts > dst->m_ts)
+		return ERR_PTR(-EINVAL);
+
+	alen = len;
+	if (alen > DATALEN_MSG)
+		alen = DATALEN_MSG;
+
+	dst->next = NULL;
+	dst->security = NULL;
 
+	memcpy(dst + 1, src + 1, alen);
+
+	len -= alen;
+	dst_pseg = dst->next;
+	src_pseg = src->next;
+	while (len > 0) {
+		alen = len;
+		if (alen > DATALEN_SEG)
+			alen = DATALEN_SEG;
+		memcpy(dst_pseg + 1, src_pseg + 1, alen);
+		dst_pseg = dst_pseg->next;
+		len -= alen;
+		src_pseg = src_pseg->next;
+	}
+
+	dst->m_type = src->m_type;
+	dst->m_ts = src->m_ts;
+
+	return dst;
+}
+#endif
 int store_msg(void __user *dest, struct msg_msg *msg, int len)
 {
 	int alen;
diff --git a/ipc/util.h b/ipc/util.h
index b48016d..953339f 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -142,6 +142,7 @@ int ipc_parse_version (int *cmd);
 
 extern void free_msg(struct msg_msg *msg);
 extern struct msg_msg *load_msg(const void __user *src, int len);
+extern struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst);
 extern int store_msg(void __user *dest, struct msg_msg *msg, int len);
 
 extern void recompute_msgmni(struct ipc_namespace *);


^ permalink raw reply related

* [PATCH v5 08/10] IPC: message queue receive cleanup
From: Stanislav Kinsbursky @ 2012-09-19 16:06 UTC (permalink / raw)
  To: akpm
  Cc: manfred, a.p.zijlstra, netdev, will.deacon, linux-kernel,
	cmetcalf, jmorris, linux-driver, linux-security-module, hughd,
	ron.mercer, viro, james.l.morris, catalin.marinas, casey, eparis,
	sds, jitendra.kalsaria, devel
In-Reply-To: <20120919160430.11254.86848.stgit@localhost6.localdomain6>

This patch moves all message related manipulation into one function msg_fill().
Actually, two functions because of the compat one.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
---
 include/linux/msg.h |    5 +++--
 ipc/compat.c        |   36 +++++++++++++++++++-----------------
 ipc/msg.c           |   44 +++++++++++++++++++++++---------------------
 3 files changed, 45 insertions(+), 40 deletions(-)

diff --git a/include/linux/msg.h b/include/linux/msg.h
index 6689e73..9411b76 100644
--- a/include/linux/msg.h
+++ b/include/linux/msg.h
@@ -105,8 +105,9 @@ struct msg_queue {
 /* Helper routines for sys_msgsnd and sys_msgrcv */
 extern long do_msgsnd(int msqid, long mtype, void __user *mtext,
 			size_t msgsz, int msgflg);
-extern long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
-			size_t msgsz, long msgtyp, int msgflg);
+extern long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
+		      int msgflg,
+		      long (*msg_fill)(void __user *, struct msg_msg *, size_t ));
 
 #endif /* __KERNEL__ */
 
diff --git a/ipc/compat.c b/ipc/compat.c
index 84d8efd..b879d50 100644
--- a/ipc/compat.c
+++ b/ipc/compat.c
@@ -341,13 +341,23 @@ long compat_sys_msgsnd(int first, int second, int third, void __user *uptr)
 	return do_msgsnd(first, type, up->mtext, second, third);
 }
 
+long compat_do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
+{
+	struct compat_msgbuf __user *msgp = dest;
+	size_t msgsz;
+
+	if (put_user(msg->m_type, &msgp->mtype))
+		return -EFAULT;
+
+	msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz;
+	if (store_msg(msgp->mtext, msg, msgsz))
+		return -EFAULT;
+	return msgsz;
+}
+
 long compat_sys_msgrcv(int first, int second, int msgtyp, int third,
 			   int version, void __user *uptr)
 {
-	struct compat_msgbuf __user *up;
-	long type;
-	int err;
-
 	if (first < 0)
 		return -EINVAL;
 	if (second < 0)
@@ -355,23 +365,14 @@ long compat_sys_msgrcv(int first, int second, int msgtyp, int third,
 
 	if (!version) {
 		struct compat_ipc_kludge ipck;
-		err = -EINVAL;
 		if (!uptr)
-			goto out;
-		err = -EFAULT;
+			return -EINVAL;
 		if (copy_from_user (&ipck, uptr, sizeof(ipck)))
-			goto out;
+			return -EFAULT;
 		uptr = compat_ptr(ipck.msgp);
 		msgtyp = ipck.msgtyp;
 	}
-	up = uptr;
-	err = do_msgrcv(first, &type, up->mtext, second, msgtyp, third);
-	if (err < 0)
-		goto out;
-	if (put_user(type, &up->mtype))
-		err = -EFAULT;
-out:
-	return err;
+	return do_msgrcv(first, uptr, second, msgtyp, third, compat_do_msg_fill);
 }
 #else
 long compat_sys_semctl(int semid, int semnum, int cmd, int arg)
@@ -394,7 +395,8 @@ long compat_sys_msgrcv(int msqid, struct compat_msgbuf __user *msgp,
 {
 	long err, mtype;
 
-	err =  do_msgrcv(msqid, &mtype, msgp->mtext, (ssize_t)msgsz, msgtyp, msgflg);
+	err =  do_msgrcv(msqid, &mtype, msgp->mtext, (ssize_t)msgsz, msgtyp,
+			 msgflg, compat_do_msg_fill);
 	if (err < 0)
 		goto out;
 
diff --git a/ipc/msg.c b/ipc/msg.c
index ef4f118..d8168a7 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -763,15 +763,30 @@ static inline int convert_mode(long *msgtyp, int msgflg)
 	return SEARCH_EQUAL;
 }
 
-long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
-		size_t msgsz, long msgtyp, int msgflg)
+static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
+{
+	struct msgbuf __user *msgp = dest;
+	size_t msgsz;
+
+	if (put_user(msg->m_type, &msgp->mtype))
+		return -EFAULT;
+
+	msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz;
+	if (store_msg(msgp->mtext, msg, msgsz))
+		return -EFAULT;
+	return msgsz;
+}
+
+long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
+	       int msgflg,
+	       long (*msg_handler)(void __user *, struct msg_msg *, size_t ))
 {
 	struct msg_queue *msq;
 	struct msg_msg *msg;
 	int mode;
 	struct ipc_namespace *ns;
 
-	if (msqid < 0 || (long) msgsz < 0)
+	if (msqid < 0 || (long) bufsz < 0)
 		return -EINVAL;
 	mode = convert_mode(&msgtyp, msgflg);
 	ns = current->nsproxy->ipc_ns;
@@ -814,7 +829,7 @@ long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
 			 * Found a suitable message.
 			 * Unlink it from the queue.
 			 */
-			if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
+			if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
 				msg = ERR_PTR(-E2BIG);
 				goto out_unlock;
 			}
@@ -841,7 +856,7 @@ long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
 		if (msgflg & MSG_NOERROR)
 			msr_d.r_maxsize = INT_MAX;
 		else
-			msr_d.r_maxsize = msgsz;
+			msr_d.r_maxsize = bufsz;
 		msr_d.r_msg = ERR_PTR(-EAGAIN);
 		current->state = TASK_INTERRUPTIBLE;
 		msg_unlock(msq);
@@ -904,29 +919,16 @@ out_unlock:
 	if (IS_ERR(msg))
 		return PTR_ERR(msg);
 
-	msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz;
-	*pmtype = msg->m_type;
-	if (store_msg(mtext, msg, msgsz))
-		msgsz = -EFAULT;
-
+	bufsz = msg_handler(buf, msg, bufsz);
 	free_msg(msg);
 
-	return msgsz;
+	return bufsz;
 }
 
 SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
 		long, msgtyp, int, msgflg)
 {
-	long err, mtype;
-
-	err =  do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg);
-	if (err < 0)
-		goto out;
-
-	if (put_user(mtype, &msgp->mtype))
-		err = -EFAULT;
-out:
-	return err;
+	return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill);
 }
 
 #ifdef CONFIG_PROC_FS


^ permalink raw reply related

* [PATCH v5 07/10] ipc: add new SEM_SET command for sys_semctl() call
From: Stanislav Kinsbursky @ 2012-09-19 16:06 UTC (permalink / raw)
  To: akpm
  Cc: manfred, a.p.zijlstra, netdev, will.deacon, linux-kernel,
	cmetcalf, jmorris, linux-driver, linux-security-module, hughd,
	ron.mercer, viro, james.l.morris, catalin.marinas, casey, eparis,
	sds, jitendra.kalsaria, devel
In-Reply-To: <20120919160430.11254.86848.stgit@localhost6.localdomain6>

New SEM_SET command will be interpreted exactly as IPC_SET, but also will
update key, cuid and cgid values. IOW, it allows to change existent key value.
The fact, that key is not used is checked before update. Otherwise -EEXIST is
returned.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
---
 include/linux/sem.h        |    1 +
 ipc/compat.c               |    1 +
 ipc/sem.c                  |   10 ++++++++--
 security/selinux/hooks.c   |    1 +
 security/smack/smack_lsm.c |    1 +
 5 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/include/linux/sem.h b/include/linux/sem.h
index 10d6b22..c74b9b5 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -18,6 +18,7 @@
 /* ipcs ctl cmds */
 #define SEM_STAT 18
 #define SEM_INFO 19
+#define SEM_SET  20
 
 /* Obsolete, used only for backwards compatibility and libc5 compiles */
 struct semid_ds {
diff --git a/ipc/compat.c b/ipc/compat.c
index 9c70f9a..84d8efd 100644
--- a/ipc/compat.c
+++ b/ipc/compat.c
@@ -290,6 +290,7 @@ static long do_compat_semctl(int first, int second, int third, u32 pad)
 		break;
 
 	case IPC_SET:
+	case SEM_SET:
 		if (version == IPC_64) {
 			err = get_compat_semid64_ds(&s64, compat_ptr(pad));
 		} else {
diff --git a/ipc/sem.c b/ipc/sem.c
index e89b90c..b4f80082 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1085,12 +1085,13 @@ static int semctl_down(struct ipc_namespace *ns, int semid,
 	struct semid64_ds semid64;
 	struct kern_ipc_perm *ipcp;
 
-	if(cmd == IPC_SET) {
+	if (cmd == IPC_SET || cmd == SEM_SET) {
 		if (copy_semid_from_user(&semid64, arg.buf, version))
 			return -EFAULT;
 	}
 
-	ipcp = ipcctl_pre_down(ns, &sem_ids(ns), semid, cmd,
+	ipcp = ipcctl_pre_down(ns, &sem_ids(ns), semid,
+			       (cmd != SEM_SET) ? cmd : IPC_SET,
 			       &semid64.sem_perm, 0);
 	if (IS_ERR(ipcp))
 		return PTR_ERR(ipcp);
@@ -1105,6 +1106,10 @@ static int semctl_down(struct ipc_namespace *ns, int semid,
 	case IPC_RMID:
 		freeary(ns, ipcp);
 		goto out_up;
+	case SEM_SET:
+		err = ipc_update_key(&sem_ids(ns), &semid64.sem_perm, ipcp);
+		if (err)
+			break;
 	case IPC_SET:
 		ipc_update_perm(&semid64.sem_perm, ipcp);
 		sma->sem_ctime = get_seconds();
@@ -1150,6 +1155,7 @@ SYSCALL_DEFINE(semctl)(int semid, int semnum, int cmd, union semun arg)
 		return err;
 	case IPC_RMID:
 	case IPC_SET:
+	case SEM_SET:
 		err = semctl_down(ns, semid, cmd, version, arg);
 		return err;
 	default:
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 8269286..a23a7d6 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -5164,6 +5164,7 @@ static int selinux_sem_semctl(struct sem_array *sma, int cmd)
 		perms = SEM__DESTROY;
 		break;
 	case IPC_SET:
+	case SEM_SET:
 		perms = SEM__SETATTR;
 		break;
 	case IPC_STAT:
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 193e147..fea40cf 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -2274,6 +2274,7 @@ static int smack_sem_semctl(struct sem_array *sma, int cmd)
 	case SETALL:
 	case IPC_RMID:
 	case IPC_SET:
+	case SEM_SET:
 		may = MAY_READWRITE;
 		break;
 	case IPC_INFO:

^ permalink raw reply related

* [PATCH v5 06/10] glge driver: rename internal SEM_SET macro to SEM_INIT
From: Stanislav Kinsbursky @ 2012-09-19 16:05 UTC (permalink / raw)
  To: akpm
  Cc: manfred, a.p.zijlstra, netdev, will.deacon, linux-kernel,
	cmetcalf, jmorris, linux-driver, linux-security-module, hughd,
	ron.mercer, viro, james.l.morris, catalin.marinas, casey, eparis,
	sds, jitendra.kalsaria, devel
In-Reply-To: <20120919160430.11254.86848.stgit@localhost6.localdomain6>

The reason for shit patch is that SET_SET is desired to be a part of new part
of API of IPC sys_semctl() system call.
The name itself for IPC is quite natural, because all linux-specific commands
names for IPC system calls are originally created by replacing "IPC_" part by
"SEM_"("MSG_", "SHM_") part.
So, I'm hoping, that this change doesn't really matters for "QLogic qlge NIC
HBA Driver" developers, since it's just an internal define.
---
 drivers/net/ethernet/qlogic/qlge/qlge.h      |    4 ++--
 drivers/net/ethernet/qlogic/qlge/qlge_main.c |   16 ++++++++--------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h
index a131d7b..6f46ea5 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge.h
+++ b/drivers/net/ethernet/qlogic/qlge/qlge.h
@@ -347,10 +347,10 @@ enum {
 enum {
 	/*
 	 * Example:
-	 * reg = SEM_XGMAC0_MASK | (SEM_SET << SEM_XGMAC0_SHIFT)
+	 * reg = SEM_XGMAC0_MASK | (SEM_INIT << SEM_XGMAC0_SHIFT)
 	 */
 	SEM_CLEAR = 0,
-	SEM_SET = 1,
+	SEM_INIT = 1,
 	SEM_FORCE = 3,
 	SEM_XGMAC0_SHIFT = 0,
 	SEM_XGMAC1_SHIFT = 2,
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index b53a3b6..18e0a0c 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -109,28 +109,28 @@ static int ql_sem_trylock(struct ql_adapter *qdev, u32 sem_mask)
 
 	switch (sem_mask) {
 	case SEM_XGMAC0_MASK:
-		sem_bits = SEM_SET << SEM_XGMAC0_SHIFT;
+		sem_bits = SEM_INIT << SEM_XGMAC0_SHIFT;
 		break;
 	case SEM_XGMAC1_MASK:
-		sem_bits = SEM_SET << SEM_XGMAC1_SHIFT;
+		sem_bits = SEM_INIT << SEM_XGMAC1_SHIFT;
 		break;
 	case SEM_ICB_MASK:
-		sem_bits = SEM_SET << SEM_ICB_SHIFT;
+		sem_bits = SEM_INIT << SEM_ICB_SHIFT;
 		break;
 	case SEM_MAC_ADDR_MASK:
-		sem_bits = SEM_SET << SEM_MAC_ADDR_SHIFT;
+		sem_bits = SEM_INIT << SEM_MAC_ADDR_SHIFT;
 		break;
 	case SEM_FLASH_MASK:
-		sem_bits = SEM_SET << SEM_FLASH_SHIFT;
+		sem_bits = SEM_INIT << SEM_FLASH_SHIFT;
 		break;
 	case SEM_PROBE_MASK:
-		sem_bits = SEM_SET << SEM_PROBE_SHIFT;
+		sem_bits = SEM_INIT << SEM_PROBE_SHIFT;
 		break;
 	case SEM_RT_IDX_MASK:
-		sem_bits = SEM_SET << SEM_RT_IDX_SHIFT;
+		sem_bits = SEM_INIT << SEM_RT_IDX_SHIFT;
 		break;
 	case SEM_PROC_REG_MASK:
-		sem_bits = SEM_SET << SEM_PROC_REG_SHIFT;
+		sem_bits = SEM_INIT << SEM_PROC_REG_SHIFT;
 		break;
 	default:
 		netif_alert(qdev, probe, qdev->ndev, "bad Semaphore mask!.\n");

^ permalink raw reply related

* [PATCH v5 05/10] ipc: add new MSG_SET command for sys_msgctl() call
From: Stanislav Kinsbursky @ 2012-09-19 16:05 UTC (permalink / raw)
  To: akpm
  Cc: manfred, a.p.zijlstra, netdev, will.deacon, linux-kernel,
	cmetcalf, jmorris, linux-driver, linux-security-module, hughd,
	ron.mercer, viro, james.l.morris, catalin.marinas, casey, eparis,
	sds, jitendra.kalsaria, devel
In-Reply-To: <20120919160430.11254.86848.stgit@localhost6.localdomain6>

New MSG_SET command will be interpreted exactly as IPC_SET, but also will
update key, cuid and cgid values. IOW, it allows to change existent key value.
The fact, that key is not used is checked before update. Otherwise -EEXIST is
returned.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
---
 include/linux/msg.h        |    1 +
 ipc/compat.c               |    1 +
 ipc/msg.c                  |   13 +++++++++++--
 security/selinux/hooks.c   |    1 +
 security/smack/smack_lsm.c |    1 +
 5 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/include/linux/msg.h b/include/linux/msg.h
index 56abf15..6689e73 100644
--- a/include/linux/msg.h
+++ b/include/linux/msg.h
@@ -6,6 +6,7 @@
 /* ipcs ctl commands */
 #define MSG_STAT 11
 #define MSG_INFO 12
+#define MSG_SET  13
 
 /* msgrcv options */
 #define MSG_NOERROR     010000  /* no error if message is too big */
diff --git a/ipc/compat.c b/ipc/compat.c
index 35c750d..9c70f9a 100644
--- a/ipc/compat.c
+++ b/ipc/compat.c
@@ -483,6 +483,7 @@ long compat_sys_msgctl(int first, int second, void __user *uptr)
 		break;
 
 	case IPC_SET:
+	case MSG_SET:
 		if (version == IPC_64) {
 			err = get_compat_msqid64(&m64, uptr);
 		} else {
diff --git a/ipc/msg.c b/ipc/msg.c
index 1cecaf2..ef4f118 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -392,6 +392,9 @@ copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)
 		out->msg_perm.uid      	= tbuf_old.msg_perm.uid;
 		out->msg_perm.gid      	= tbuf_old.msg_perm.gid;
 		out->msg_perm.mode     	= tbuf_old.msg_perm.mode;
+		out->msg_perm.cuid	= tbuf_old.msg_perm.cuid;
+		out->msg_perm.cgid	= tbuf_old.msg_perm.cgid;
+		out->msg_perm.key	= tbuf_old.msg_perm.key;
 
 		if (tbuf_old.msg_qbytes == 0)
 			out->msg_qbytes	= tbuf_old.msg_lqbytes;
@@ -418,12 +421,13 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
 	struct msg_queue *msq;
 	int err;
 
-	if (cmd == IPC_SET) {
+	if (cmd == IPC_SET || cmd == MSG_SET) {
 		if (copy_msqid_from_user(&msqid64, buf, version))
 			return -EFAULT;
 	}
 
-	ipcp = ipcctl_pre_down(ns, &msg_ids(ns), msqid, cmd,
+	ipcp = ipcctl_pre_down(ns, &msg_ids(ns), msqid,
+			       (cmd != MSG_SET) ? cmd : IPC_SET,
 			       &msqid64.msg_perm, msqid64.msg_qbytes);
 	if (IS_ERR(ipcp))
 		return PTR_ERR(ipcp);
@@ -439,6 +443,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
 		freeque(ns, ipcp);
 		goto out_up;
 	case IPC_SET:
+	case MSG_SET:
 		if (msqid64.msg_qbytes > ns->msg_ctlmnb &&
 		    !capable(CAP_SYS_RESOURCE)) {
 			err = -EPERM;
@@ -447,6 +452,9 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
 
 		msq->q_qbytes = msqid64.msg_qbytes;
 
+		if (cmd == MSG_SET)
+			ipc_update_key(&msg_ids(ns), &msqid64.msg_perm, ipcp);
+
 		ipc_update_perm(&msqid64.msg_perm, ipcp);
 		msq->q_ctime = get_seconds();
 		/* sleeping receivers might be excluded by
@@ -566,6 +574,7 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
 	}
 	case IPC_SET:
 	case IPC_RMID:
+	case MSG_SET:
 		err = msgctl_down(ns, msqid, cmd, buf, version);
 		return err;
 	default:
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 928ffc2..8269286 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4916,6 +4916,7 @@ static int selinux_msg_queue_msgctl(struct msg_queue *msq, int cmd)
 		perms = MSGQ__GETATTR | MSGQ__ASSOCIATE;
 		break;
 	case IPC_SET:
+	case MSG_SET:
 		perms = MSGQ__SETATTR;
 		break;
 	case IPC_RMID:
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 0f2c481..193e147 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -2395,6 +2395,7 @@ static int smack_msg_queue_msgctl(struct msg_queue *msq, int cmd)
 		may = MAY_READ;
 		break;
 	case IPC_SET:
+	case MSG_SET:
 	case IPC_RMID:
 		may = MAY_READWRITE;
 		break;

^ permalink raw reply related

* [PATCH v5 04/10] ipc: add new SHM_SET command for sys_shmctl() call
From: Stanislav Kinsbursky @ 2012-09-19 16:05 UTC (permalink / raw)
  To: akpm
  Cc: manfred, a.p.zijlstra, netdev, will.deacon, linux-kernel,
	cmetcalf, jmorris, linux-driver, linux-security-module, hughd,
	ron.mercer, viro, james.l.morris, catalin.marinas, casey, eparis,
	sds, jitendra.kalsaria, devel
In-Reply-To: <20120919160430.11254.86848.stgit@localhost6.localdomain6>

New SHM_SET command will be interpreted exactly as IPC_SET, but also will
update key, cuid and cgid values. IOW, it allows to change existent key value.
The fact, that key is not used is checked before update. Otherwise -EEXIST is
returned.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
---
 include/linux/shm.h        |    1 +
 ipc/compat.c               |    1 +
 ipc/shm.c                  |   13 +++++++++++--
 security/selinux/hooks.c   |    1 +
 security/smack/smack_lsm.c |    1 +
 5 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/include/linux/shm.h b/include/linux/shm.h
index edd0868..9a3e423 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -63,6 +63,7 @@ struct shmid_ds {
 /* ipcs ctl commands */
 #define SHM_STAT 	13
 #define SHM_INFO 	14
+#define SHM_SET		15
 
 /* Obsolete, used only for backwards compatibility */
 struct	shminfo {
diff --git a/ipc/compat.c b/ipc/compat.c
index af30d13..35c750d 100644
--- a/ipc/compat.c
+++ b/ipc/compat.c
@@ -692,6 +692,7 @@ long compat_sys_shmctl(int first, int second, void __user *uptr)
 
 
 	case IPC_SET:
+	case SHM_SET:
 		if (version == IPC_64) {
 			err = get_compat_shmid64_ds(&s64, uptr);
 		} else {
diff --git a/ipc/shm.c b/ipc/shm.c
index 0088418..65c0c5c 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -636,6 +636,9 @@ copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
 		out->shm_perm.uid	= tbuf_old.shm_perm.uid;
 		out->shm_perm.gid	= tbuf_old.shm_perm.gid;
 		out->shm_perm.mode	= tbuf_old.shm_perm.mode;
+		out->shm_perm.cuid	= tbuf_old.shm_perm.cuid;
+		out->shm_perm.cgid	= tbuf_old.shm_perm.cgid;
+		out->shm_perm.key	= tbuf_old.shm_perm.key;
 
 		return 0;
 	    }
@@ -740,12 +743,13 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
 	struct shmid_kernel *shp;
 	int err;
 
-	if (cmd == IPC_SET) {
+	if (cmd == IPC_SET || cmd == SHM_SET) {
 		if (copy_shmid_from_user(&shmid64, buf, version))
 			return -EFAULT;
 	}
 
-	ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid, cmd,
+	ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid,
+			       (cmd != SHM_SET) ? cmd : IPC_SET,
 			       &shmid64.shm_perm, 0);
 	if (IS_ERR(ipcp))
 		return PTR_ERR(ipcp);
@@ -759,6 +763,10 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
 	case IPC_RMID:
 		do_shm_rmid(ns, ipcp);
 		goto out_up;
+	case SHM_SET:
+		err = ipc_update_key(&shm_ids(ns), &shmid64.shm_perm, ipcp);
+		if (err)
+			break;
 	case IPC_SET:
 		ipc_update_perm(&shmid64.shm_perm, ipcp);
 		shp->shm_ctim = get_seconds();
@@ -936,6 +944,7 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
 	}
 	case IPC_RMID:
 	case IPC_SET:
+	case SHM_SET:
 		err = shmctl_down(ns, shmid, cmd, buf, version);
 		return err;
 	default:
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 6c77f63..928ffc2 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -5058,6 +5058,7 @@ static int selinux_shm_shmctl(struct shmid_kernel *shp, int cmd)
 		perms = SHM__GETATTR | SHM__ASSOCIATE;
 		break;
 	case IPC_SET:
+	case SHM_SET:
 		perms = SHM__SETATTR;
 		break;
 	case SHM_LOCK:
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 8221514..0f2c481 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -2142,6 +2142,7 @@ static int smack_shm_shmctl(struct shmid_kernel *shp, int cmd)
 		may = MAY_READ;
 		break;
 	case IPC_SET:
+	case SHM_SET:
 	case SHM_LOCK:
 	case SHM_UNLOCK:
 	case IPC_RMID:

^ permalink raw reply related

* [PATCH v5 03/10] ipc: segment key change helper introduced
From: Stanislav Kinsbursky @ 2012-09-19 16:05 UTC (permalink / raw)
  To: akpm
  Cc: manfred, a.p.zijlstra, netdev, will.deacon, linux-kernel,
	cmetcalf, jmorris, linux-driver, linux-security-module, hughd,
	ron.mercer, viro, james.l.morris, catalin.marinas, casey, eparis,
	sds, jitendra.kalsaria, devel
In-Reply-To: <20120919160430.11254.86848.stgit@localhost6.localdomain6>

This patch introduces existent segment key changing infrastructure.
New function ipc_update_key() can be used change segment key, cuid, cgid
values. It checks for that new key is not used (except IPC_PRIVATE) prior to
set it on existent.
To make this possible, added copying of this fields from user-space in
__get_compat_ipc_perm() and __get_compat_ipc64_perm() functions. Also segment
search by key and lock were splitted into different functions, because
ipc_update_key() doesn't need to lock the segment during check that new key is
not used.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
---
 ipc/compat.c |    6 ++++++
 ipc/util.c   |   51 ++++++++++++++++++++++++++++++++++++++++++++++++---
 ipc/util.h   |    2 ++
 3 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/ipc/compat.c b/ipc/compat.c
index ad9518e..af30d13 100644
--- a/ipc/compat.c
+++ b/ipc/compat.c
@@ -144,6 +144,9 @@ static inline int __get_compat_ipc64_perm(struct ipc64_perm *p64,
 	err  = __get_user(p64->uid, &up64->uid);
 	err |= __get_user(p64->gid, &up64->gid);
 	err |= __get_user(p64->mode, &up64->mode);
+	err |= __get_user(p64->cuid, &up64->cuid);
+	err |= __get_user(p64->cgid, &up64->cgid);
+	err |= __get_user(p64->key, &up64->key);
 	return err;
 }
 
@@ -155,6 +158,9 @@ static inline int __get_compat_ipc_perm(struct ipc64_perm *p,
 	err  = __get_user(p->uid, &up->uid);
 	err |= __get_user(p->gid, &up->gid);
 	err |= __get_user(p->mode, &up->mode);
+	err |= __get_user(p->cuid, &up->cuid);
+	err |= __get_user(p->cgid, &up->cgid);
+	err |= __get_user(p->key, &up->key);
 	return err;
 }
 
diff --git a/ipc/util.c b/ipc/util.c
index 328abd1..1154245 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -173,7 +173,7 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
  *	@key: The key to find
  *	
  *	Requires ipc_ids.rw_mutex locked.
- *	Returns the LOCKED pointer to the ipc structure if found or NULL
+ *	Returns the UNLOCKED pointer to the ipc structure if found or NULL
  *	if not.
  *	If key is found ipc points to the owning ipc structure
  */
@@ -195,7 +195,6 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
 			continue;
 		}
 
-		ipc_lock_by_ptr(ipc);
 		return ipc;
 	}
 
@@ -203,6 +202,27 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
 }
 
 /**
+ *	ipc_findkey_locked	-	find and lock a key in an ipc identifier set
+ *	@ids: Identifier set
+ *	@key: The key to find
+ *
+ *	Requires ipc_ids.rw_mutex locked.
+ *	Returns the LOCKED pointer to the ipc structure if found or NULL
+ *	if not.
+ *	If key is found ipc points to the owning ipc structure
+ */
+
+static struct kern_ipc_perm *ipc_findkey_locked(struct ipc_ids *ids, key_t key)
+{
+	struct kern_ipc_perm *ipc;
+
+	ipc = ipc_findkey(ids, key);
+	if (ipc)
+		ipc_lock_by_ptr(ipc);
+	return ipc;
+}
+
+/**
  *	ipc_get_maxid 	-	get the last assigned id
  *	@ids: IPC identifier set
  *
@@ -388,7 +408,7 @@ retry:
 	 * a new entry + read locks are not "upgradable"
 	 */
 	down_write(&ids->rw_mutex);
-	ipcp = ipc_findkey(ids, params->key);
+	ipcp = ipc_findkey_locked(ids, params->key);
 	if (ipcp == NULL) {
 		/* key not used */
 		if (!(flg & IPC_CREAT))
@@ -755,6 +775,31 @@ int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
 }
 
 /**
+ * ipc_update_key - update the key of an IPC.
+ * @in:  the permission given as input.
+ * @out: the permission of the ipc to set.
+ *
+ * Common routine called by sys_shmctl(), sys_semctl(). sys_msgctl().
+ */
+int ipc_update_key(struct ipc_ids *ids, struct ipc64_perm *in,
+		    struct kern_ipc_perm *out)
+{
+
+	if (in->key && out->key != in->key) {
+		/*
+		 * Check for existent segment with the same key.
+		 * Note: ipc_ids.rw_mutex is taken for write already.
+		 */
+		if (ipc_findkey(ids, in->key))
+			return -EEXIST;
+	}
+	out->cuid = in->cuid;
+	out->cgid = in->cgid;
+	out->key = in->key;
+	return 0;
+}
+
+/**
  * ipc_update_perm - update the permissions of an IPC.
  * @in:  the permission given as input.
  * @out: the permission of the ipc to set.
diff --git a/ipc/util.h b/ipc/util.h
index 878df18..b48016d 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -126,6 +126,8 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
 
 void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
 void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out);
+int ipc_update_key(struct ipc_ids *ids, struct ipc64_perm *in,
+		   struct kern_ipc_perm *out);
 void ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out);
 struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
 				      struct ipc_ids *ids, int id, int cmd,

^ permalink raw reply related

* [PATCH v5 01/10] ipc: remove forced assignment of selected message
From: Stanislav Kinsbursky @ 2012-09-19 16:05 UTC (permalink / raw)
  To: akpm
  Cc: manfred, a.p.zijlstra, netdev, will.deacon, linux-kernel,
	cmetcalf, jmorris, linux-driver, linux-security-module, hughd,
	ron.mercer, viro, james.l.morris, catalin.marinas, casey, eparis,
	sds, jitendra.kalsaria, devel
In-Reply-To: <20120919160430.11254.86848.stgit@localhost6.localdomain6>

This is a cleanup patch. The assignment is redundant.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
---
 ipc/msg.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/ipc/msg.c b/ipc/msg.c
index 7385de2..f3bfbb8 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -787,7 +787,6 @@ long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
 			    !security_msg_queue_msgrcv(msq, walk_msg, current,
 						       msgtyp, mode)) {
 
-				msg = walk_msg;
 				if (mode == SEARCH_LESSEQUAL &&
 						walk_msg->m_type != 1) {
 					msg = walk_msg;


^ permalink raw reply related

* Re: [PATCH net-next] net: more accurate network taps in transmit path
From: Jamie Gloudon @ 2012-09-19 15:58 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev
In-Reply-To: <1348037089.26523.397.camel@edumazet-glaptop>

Just to report. This patch fixed the invalid tcp tx checksum issue via tap for me. Thanks!

On Wed, Sep 19, 2012 at 08:44:49AM +0200, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> dev_queue_xmit_nit() should be called right before ndo_start_xmit()
> calls or we might give wrong packet contents to taps users :
> 
> Packet checksum can be changed, or packet can be linearized or
> segmented, and segments partially sent for the later case.
> 
> Also a memory allocation can fail and packet never really hit the
> driver entry point.
> 
> Reported-by: Jamie Gloudon <jamie.gloudon@gmail.com>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  net/core/dev.c |    9 ++++++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/net/core/dev.c b/net/core/dev.c
> index dcc673d..52cd1d7 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2213,9 +2213,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
>  		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
>  			skb_dst_drop(skb);
>  
> -		if (!list_empty(&ptype_all))
> -			dev_queue_xmit_nit(skb, dev);
> -
>  		features = netif_skb_features(skb);
>  
>  		if (vlan_tx_tag_present(skb) &&
> @@ -2250,6 +2247,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
>  			}
>  		}
>  
> +		if (!list_empty(&ptype_all))
> +			dev_queue_xmit_nit(skb, dev);
> +
>  		skb_len = skb->len;
>  		rc = ops->ndo_start_xmit(skb, dev);
>  		trace_net_dev_xmit(skb, rc, dev, skb_len);
> @@ -2272,6 +2272,9 @@ gso:
>  		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
>  			skb_dst_drop(nskb);
>  
> +		if (!list_empty(&ptype_all))
> +			dev_queue_xmit_nit(nskb, dev);
> +
>  		skb_len = nskb->len;
>  		rc = ops->ndo_start_xmit(nskb, dev);
>  		trace_net_dev_xmit(nskb, rc, dev, skb_len);
> 
> 

^ permalink raw reply

* Re: New commands to configure IOV features
From: Greg Rose @ 2012-09-19 15:53 UTC (permalink / raw)
  To: Yuval Mintz
  Cc: davem@davemloft.net, netdev@vger.kernel.org, Ariel Elior,
	Eilon Greenstein
In-Reply-To: <5059A767.2090307@broadcom.com>

On Wed, 19 Sep 2012 14:07:19 +0300
Yuval Mintz <yuvalmin@broadcom.com> wrote:

> >>> Back to the original discussion though--has anyone got any ideas
> >>> about the best way to trigger runtime creation of VFs?  I don't
> >>> know what the binary APIs looks like, but via sysfs I could see
> >>> something like
> >>>
> >>> echo number_of_new_vfs_to_create >
> >>> /sys/bus/pci/devices/<address>/create_vfs
> >>>
> >>> Something else that occurred to me--is there buy-in from driver
> >>> maintainers?  I know the Intel ethernet drivers (what I'm most
> >>> familiar
> >>> with) would need to be substantially modified to support
> >>> on-the-fly addition of new vfs.  Currently they assume that the
> >>> number of vfs is known at module init time.
> >>
> >> Why couldn't rtnl_link_ops be used for this. It is already the
> >> preferred interface to create vlan's, bond devices, and other
> >> virtual devices? The one issue is that do the created VF's exist
> >> in kernel as devices or only visible to guest?
> > 
> > I would say that rtnl_link_ops are network oriented and not
> > appropriate for something like a storage controller or graphics
> > device, which are two other common SR-IOV capable devices.
> 
> Hi Dave,
> 
> We're currently fine-tuning our SRIOV support, which we will shortly
> send upstream.
> 
> We've encountered a problem though - all drivers currently supporting
> SRIOV do so with the usage of a module param: e.g., 'max_vfs' for
> ixgbe, 'num_vfs' for benet, etc.
> The SRIOV feature is disabled by default on all the drivers; it can
> only be enabled via usage of the module param.
> 
> We don't want the lack of SRIOV module param in the bnx2x driver to be
> the bottle-neck when we'll submit the SRIOV feature upstream, and we
> also don't want to enable SRIOV by default (following the same logic
> of other drivers; most users don't use SRIOV and it would strain their
> resources).
> 
> As we see it, there are several possible ways of solving the issue:
>  1. Use some network-tool (e.g., ethtool).
>  2. Implement a standard sysfs interface for PCIe devices, as SRIOV is
>     not solely network-related (this should be done via the PCI linux
>     tree).

I was not able to attend the Linux conference held at the end of August
myself but coworkers of mine here at Intel informed that method 2 here
seems to be the preferred approach.  Perhaps some folks who attended
the the conference can chime in with more specifics.

- Greg
LAN Access Division
Intel Corp.



>  3. Implement a module param in our bnx2x code.
> 
> We would like to know what's your preferred method for solving this
> issue, and to hear if you have another (better?) method by which we
> can add this kind of support.
> 
> Thanks,
> Yuval Mintz
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: Possible networking regression in 3.6.0
From: Chris Clayton @ 2012-09-19 15:26 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev
In-Reply-To: <1347979239.26523.267.camel@edumazet-glaptop>

>
> It would help to have some traffic sample, maybe.
>
> Especially if the problem is not easily reproductible for us.
>

OK, I've used an netsniff-ng to capture the traffic on all interfaces on 
the host (that would be tap0 and eth0, I guess) whilst attempting to 
ping the router from the WinXP KVM client. The result is a pcap file 
that I processed with tcpdump to produce:

reading from file net-trace.pcap, link-type EN10MB (Ethernet)
14:56:31.406336 ARP, Request who-has 192.168.200.254 tell 192.168.200.1, 
length 28
         0x0000:  0001 0800 0604 0001 5254 0c3b 1728 c0a8
         0x0010:  c801 0000 0000 0000 c0a8 c8fe
14:56:31.406357 ARP, Reply 192.168.200.254 is-at 46:83:93:8f:f0:7e, 
length 28
         0x0000:  0001 0800 0604 0002 4683 938f f07e c0a8
         0x0010:  c8fe 5254 0c3b 1728 c0a8 c801
14:56:31.406534 IP 192.168.200.1 > 192.168.0.1: ICMP echo request, id 
512, seq 4352, length 40
         0x0000:  4500 003c 0195 0000 8001 efd8 c0a8 c801
         0x0010:  c0a8 0001 0800 3a5c 0200 1100 6162 6364
         0x0020:  6566 6768 696a 6b6c 6d6e 6f70 7172 7374
         0x0030:  7576 7761 6263 6465 6667 6869
14:56:31.406566 ARP, Request who-has 192.168.0.1 tell 192.168.0.40, 
length 28
         0x0000:  0001 0800 0604 0001 5c9a d85c 6331 c0a8
         0x0010:  0028 0000 0000 0000 c0a8 0001
14:56:31.410830 ARP, Reply 192.168.0.1 is-at 00:1f:33:80:09:44, length 46
         0x0000:  0001 0800 0604 0002 001f 3380 0944 c0a8
         0x0010:  0001 5c9a d85c 6331 c0a8 0028 c0a8 0001
         0x0020:  e000 0001 1164 ee9b 0000 0000 4500
14:56:31.410851 IP 192.168.0.40 > 192.168.0.1: ICMP echo request, id 
512, seq 4352, length 40
         0x0000:  4500 003c 0195 0000 7f01 b8b2 c0a8 0028
         0x0010:  c0a8 0001 0800 3a5c 0200 1100 6162 6364
         0x0020:  6566 6768 696a 6b6c 6d6e 6f70 7172 7374
         0x0030:  7576 7761 6263 6465 6667 6869
14:56:31.414474 IP 192.168.0.1 > 192.168.0.40: ICMP echo reply, id 512, 
seq 4352, length 40
         0x0000:  4500 003c cf4f 0000 ff01 6af7 c0a8 0001
         0x0010:  c0a8 0028 0000 425c 0200 1100 6162 6364
         0x0020:  6566 6768 696a 6b6c 6d6e 6f70 7172 7374
         0x0030:  7576 7761 6263 6465 6667 6869
14:56:36.404781 ARP, Request who-has 192.168.0.40 tell 192.168.0.1, 
length 46
         0x0000:  0001 0800 0604 0001 001f 3380 0944 c0a8
         0x0010:  0001 0000 0000 0000 c0a8 0028 c0a8 0001
         0x0020:  c0a8 0028 0000 425c 0200 1100 6162
14:56:36.404806 ARP, Reply 192.168.0.40 is-at 5c:9a:d8:5c:63:31, length 28
         0x0000:  0001 0800 0604 0002 5c9a d85c 6331 c0a8
         0x0010:  0028 001f 3380 0944 c0a8 0001
14:56:36.689750 IP 192.168.200.1 > 192.168.0.1: ICMP echo request, id 
512, seq 4608, length 40
         0x0000:  4500 003c 0196 0000 8001 efd7 c0a8 c801
         0x0010:  c0a8 0001 0800 395c 0200 1200 6162 6364
         0x0020:  6566 6768 696a 6b6c 6d6e 6f70 7172 7374
         0x0030:  7576 7761 6263 6465 6667 6869
14:56:36.689774 IP 192.168.0.40 > 192.168.0.1: ICMP echo request, id 
512, seq 4608, length 40
         0x0000:  4500 003c 0196 0000 7f01 b8b1 c0a8 0028
         0x0010:  c0a8 0001 0800 395c 0200 1200 6162 6364
         0x0020:  6566 6768 696a 6b6c 6d6e 6f70 7172 7374
         0x0030:  7576 7761 6263 6465 6667 6869
14:56:36.693330 IP 192.168.0.1 > 192.168.0.40: ICMP echo reply, id 512, 
seq 4608, length 40
         0x0000:  4500 003c cf50 0000 ff01 6af6 c0a8 0001
         0x0010:  c0a8 0028 0000 415c 0200 1200 6162 6364
         0x0020:  6566 6768 696a 6b6c 6d6e 6f70 7172 7374
         0x0030:  7576 7761 6263 6465 6667 6869
14:56:42.189424 IP 192.168.200.1 > 192.168.0.1: ICMP echo request, id 
512, seq 4864, length 40
         0x0000:  4500 003c 0197 0000 8001 efd6 c0a8 c801
         0x0010:  c0a8 0001 0800 385c 0200 1300 6162 6364
         0x0020:  6566 6768 696a 6b6c 6d6e 6f70 7172 7374
         0x0030:  7576 7761 6263 6465 6667 6869
14:56:42.189447 IP 192.168.0.40 > 192.168.0.1: ICMP echo request, id 
512, seq 4864, length 40
         0x0000:  4500 003c 0197 0000 7f01 b8b0 c0a8 0028
         0x0010:  c0a8 0001 0800 385c 0200 1300 6162 6364
         0x0020:  6566 6768 696a 6b6c 6d6e 6f70 7172 7374
         0x0030:  7576 7761 6263 6465 6667 6869
14:56:42.193029 IP 192.168.0.1 > 192.168.0.40: ICMP echo reply, id 512, 
seq 4864, length 40
         0x0000:  4500 003c cf51 0000 ff01 6af5 c0a8 0001
         0x0010:  c0a8 0028 0000 405c 0200 1300 6162 6364
         0x0020:  6566 6768 696a 6b6c 6d6e 6f70 7172 7374
         0x0030:  7576 7761 6263 6465 6667 6869
14:56:47.689414 IP 192.168.200.1 > 192.168.0.1: ICMP echo request, id 
512, seq 5120, length 40
         0x0000:  4500 003c 0198 0000 8001 efd5 c0a8 c801
         0x0010:  c0a8 0001 0800 375c 0200 1400 6162 6364
         0x0020:  6566 6768 696a 6b6c 6d6e 6f70 7172 7374
         0x0030:  7576 7761 6263 6465 6667 6869
14:56:47.689439 IP 192.168.0.40 > 192.168.0.1: ICMP echo request, id 
512, seq 5120, length 40
         0x0000:  4500 003c 0198 0000 7f01 b8af c0a8 0028
         0x0010:  c0a8 0001 0800 375c 0200 1400 6162 6364
         0x0020:  6566 6768 696a 6b6c 6d6e 6f70 7172 7374
         0x0030:  7576 7761 6263 6465 6667 6869
14:56:47.693661 IP 192.168.0.1 > 192.168.0.40: ICMP echo reply, id 512, 
seq 5120, length 40
         0x0000:  4500 003c cf52 0000 ff01 6af4 c0a8 0001
         0x0010:  c0a8 0028 0000 3f5c 0200 1400 6162 6364
         0x0020:  6566 6768 696a 6b6c 6d6e 6f70 7172 7374
         0x0030:  7576 7761 6263 6465 6667 6869

Is this what you asked for?

Chris

^ permalink raw reply

* Re: [RFC PATCHv2 bridge 5/7] bridge: Add vlan support to static neighbors
From: Vlad Yasevich @ 2012-09-19 15:24 UTC (permalink / raw)
  To: John Fastabend; +Cc: netdev, shemminger
In-Reply-To: <5059E2BB.8060507@intel.com>

On 09/19/2012 11:20 AM, John Fastabend wrote:
> On 9/19/2012 5:42 AM, Vlad Yasevich wrote:
>> ---
>>   include/linux/neighbour.h |    2 +-
>>   net/bridge/br_fdb.c       |   12 ++++++------
>>   2 files changed, 7 insertions(+), 7 deletions(-)
>>
>> diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h
>> index 275e5d6..044df8f 100644
>> --- a/include/linux/neighbour.h
>> +++ b/include/linux/neighbour.h
>> @@ -7,7 +7,7 @@
>>   struct ndmsg {
>>       __u8        ndm_family;
>>       __u8        ndm_pad1;
>> -    __u16        ndm_pad2;
>> +    __u16        ndm_vlan;
>
> But ndm_pad2 is also used in neighbour.c so you'll need to fix that up
> as well.
>
> net/core/neighbour.c: In function âneigh_fill_infoâ:
> net/core/neighbour.c:2152: error: âstruct ndmsgâ has no member named
> ândm_pad2â
> net/core/neighbour.c: In function âpneigh_fill_infoâ:
> net/core/neighbour.c:2203: error: âstruct ndmsgâ has no member named
> ândm_pad2â
> make[2]: *** [net/core/neighbour.o] Error 1
> make[1]: *** [net/core] Error 2
> make[1]: *** Waiting for unfinished jobs....
> make: *** [net] Error 2
> make: *** Waiting for unfinished jobs....


dough!!!  patches from wrong branch.  sorry about that.

-vlad

^ permalink raw reply

* Re: [RFC PATCHv2 bridge 5/7] bridge: Add vlan support to static neighbors
From: John Fastabend @ 2012-09-19 15:20 UTC (permalink / raw)
  To: Vlad Yasevich; +Cc: netdev, shemminger
In-Reply-To: <1348058536-22607-6-git-send-email-vyasevic@redhat.com>

On 9/19/2012 5:42 AM, Vlad Yasevich wrote:
> ---
>   include/linux/neighbour.h |    2 +-
>   net/bridge/br_fdb.c       |   12 ++++++------
>   2 files changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h
> index 275e5d6..044df8f 100644
> --- a/include/linux/neighbour.h
> +++ b/include/linux/neighbour.h
> @@ -7,7 +7,7 @@
>   struct ndmsg {
>   	__u8		ndm_family;
>   	__u8		ndm_pad1;
> -	__u16		ndm_pad2;
> +	__u16		ndm_vlan;

But ndm_pad2 is also used in neighbour.c so you'll need to fix that up
as well.

net/core/neighbour.c: In function âneigh_fill_infoâ:
net/core/neighbour.c:2152: error: âstruct ndmsgâ has no member named 
ândm_pad2â
net/core/neighbour.c: In function âpneigh_fill_infoâ:
net/core/neighbour.c:2203: error: âstruct ndmsgâ has no member named 
ândm_pad2â
make[2]: *** [net/core/neighbour.o] Error 1
make[1]: *** [net/core] Error 2
make[1]: *** Waiting for unfinished jobs....
make: *** [net] Error 2
make: *** Waiting for unfinished jobs....

^ permalink raw reply

* Re: [RFC] tcp: use order-3 pages in tcp_sendmsg()
From: Eric Dumazet @ 2012-09-19 15:14 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20120917.130732.1894375657044880827.davem@davemloft.net>

On Mon, 2012-09-17 at 13:07 -0400, David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Mon, 17 Sep 2012 19:04:53 +0200
> 
> > On Mon, 2012-09-17 at 19:02 +0200, Eric Dumazet wrote:
> > 
> >> A driver already exports a dev->gso_max_size, dev->gso_max_segs, I guess
> >> it could export a dev->max_seg_order (default to 0)
> > 
> > Oh well, if we use a per thread order-3 page, a driver wont define an
> > order, but the max size of a segment (dev->max_seg_size).
> 
> Since you said that your audit showed that most can handle arbitrary
> segment sizes, it's better to default to infinity or similar.
> 
> Otherwise we'll have to annotate almost every single driver with a
> non-zero value, that's not an efficient way to handle this and
> deploy the higher performance quickly.

I did some tests and got no problem so far, even using splice() [ this
one was tricky because it only deals with order-0 pages at this moment ]

NIC tested : ixgbe, igb, bnx2x, tg3, mellanox mlx4

On loopback, performance of netperf goes from 31900 Mb/s to 38500 Mb/s,
thats a 20 % increase.

^ permalink raw reply

* Re: [PATCH v2 7/9] net/macb: ethtool interface: add register dump feature
From: Ben Hutchings @ 2012-09-19 15:14 UTC (permalink / raw)
  To: Nicolas Ferre
  Cc: netdev, davem, havard, linux-arm-kernel, plagnioj,
	patrice.vilchez, linux-kernel
In-Reply-To: <20ebfb29f6f4a84d8ba20553e2d81cd456f438de.1348055112.git.nicolas.ferre@atmel.com>

On Wed, 2012-09-19 at 13:55 +0200, Nicolas Ferre wrote:
> Add macb_get_regs() ethtool function and its helper function:
> macb_get_regs_len().
> The version field is deduced from the IP revision which gives the
> "MACB or GEM" information. An additional version field is reserved.
> 
> Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
> Reviewed-by: Ben Hutchings <bhutchings@solarflare.com>
[...]

Please also send the register dump decoder for the ethtool utility once
this series is accepted.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply

* Macvtap bug: contractor wanted
From: Richard Davies, Chris Webb @ 2012-09-19 15:11 UTC (permalink / raw)
  To: netdev, qemu-devel; +Cc: Jason Wang, Arnd Bergmann, Michael S. Tsirkin
In-Reply-To: <20120816153613.GA22326@redhat.com>

Hi. We run a cloud compute provider using qemu-kvm and macvtap and are keen
to find a paid contractor to fix a bug with unusably slow inbound networking
over macvtap.

We originally reported the bug in this thread (report copied below):

  http://marc.info/?t=134511098600002

We have also reproduced using only a Fedora 17 Live CD:

  https://bugzilla.redhat.com/show_bug.cgi?id=855640

This bug is a serious problem for us, since we have built a new version of our
product which suffers from it and did not realise in testing, only once we had
live production installs.

Many thanks to Michael Tsirkin for his initial help. However, we appreciate
that his time is limited and divided among many projects. Given the commercial
time pressure on us to fix this bug, we are keen to hire a contractor to start
work immediately.

If anyone knowledgeable in the area would be interested in being paid to work
on this, or if you know someone who might be, we would be delighted to hear
from you.

Cheers,

Chris and Richard.

P.S. The original report read as follows:

  I'm experiencing a problem with qemu + macvtap which I can reproduce on a
  variety of hardware, with kernels varying from 3.0.4 (the oldest I tried) to
  3.5.1 and with qemu[-kvm] versions 0.14.1, 1.0, and 1.1.

  Large data transfers over TCP into a guest from another machine on the
  network are very slow (often less than 100kB/s) whereas transfers outbound
  from the guest, between two guests on the same host, or between the guest
  and its host run at normal speeds (>= 50MB/s).

  The slow inbound data transfer speeds up substantially when a ping flood is
  aimed either at the host or the guest, or when the qemu process is straced.
  Presumably both of these are ways to wake up something that is otherwise
  sleeping too long?

  For example, I can run

    ip addr add 192.168.1.2/24 dev eth0
    ip link set eth0 up
    ip link add link eth0 name tap0 address 02:02:02:02:02:02 type macvtap mode bridge
    ip link set tap0 up
    qemu-kvm -hda debian.img -cpu host -m 512 -vnc :0 \
      -net nic,model=virtio,macaddr=02:02:02:02:02:02 \
      -net tap,fd=3 3<>/dev/tap$(< /sys/class/net/tap0/ifindex)

  on one physical host which is otherwise completely idle. From a second
  physical host on the same network, I then scp a large (say 50MB) file onto
  the new guest. On a gigabit LAN, speeds consistently drop to less than
  100kB/s as the transfer progresses, within a second of starting.

  The choice of virtio virtual nic in the above isn't significant: the same thing
  happens with e1000 or rtl8139. You can also replace the scp with a straight
  netcat and see the same effect.

  Doing the transfer in the other direction (i.e. copying a large file from the
  guest to an external host) achieves 50MB/s or faster as expected. Copying
  between two guests on the same host (i.e. taking advantage of the 'mode
  bridge') is also fast.

  If I create a macvlan device attached to eth0 and move the host IP address to
  that, I can communicate between the host itself and the guest because of the
  'mode bridge'. Again, this case is fast in both directions.

  Using a bridge and a standard tap interface, transfers in and out are fast
  too:

    ip tuntap add tap0 mode tap
    brctl addbr br0
    brctl addif br0 eth0
    brctl addif br0 tap1
    ip link set eth0 up
    ip link set tap0 up
    ip link set br0 up
    ip addr add 192.168.1.2/24 dev br0
    qemu-kvm -hda debian.img -cpu host -m 512 -vnc :0 \
      -net nic,model=virtio,macaddr=02:02:02:02:02:02 \
      -net tap,script=no,downscript=no,ifname=tap0

  As mentioned in the summary at the beginning of this report, when I strace a
  guest in the original configuration which is receiving data slowly, the data
  rate improves from less than 100kB/s to around 3.1MB/s. Similarly, if I ping
  flood either the guest or the host it is running on from another machine on
  the network, the transfer rate improves to around 1.1MB/s. This seems quite
  suggestive of a problem with delayed wake-up of the guest.

  Two reasonably up-to-date examples of machines I've reproduced this on are
  my laptop with an r8169 gigabit ethernet card, Debian qemu-kvm 1.0 and
  upstream 3.4.8 kernel whose .config and boot dmesg are at

    http://cdw.me.uk/tmp/laptop-config.txt
    http://cdw.me.uk/tmp/laptop-dmesg.txt

  and one of our large servers with an igb gigabit ethernet card, upstream
  qemu-kvm 1.1.1 and upstream 3.5.1 linux:

    http://cdw.me.uk/tmp/server-config.txt
    http://cdw.me.uk/tmp/server-dmesg.txt

  For completeness, I've put the Debian 6 test image I've been using for
  testing at

    http://cdw.me.uk/tmp/test-debian.img.xz

  though I've see the same problem from a variety of guest operating systems.
  (In fact, I've not yet found any combination of host kernel, guest OS and
  hardware which doesn't show these symptoms, so it seems to be very easy to
  reproduce.)

We later found that

  -CONFIG_INTEL_IDLE=y
  +# CONFIG_INTEL_IDLE is not set

helped the problem on my laptop, but none of the obvious similar things made
any difference on AMD hardware.

The bug appears whether or not vhost-net is used, and irrespective of emulated
NIC in qemu, so is very likely to be a kernel issue rather than a qemu issue.

^ permalink raw reply

* Re: [PATCH 2/4] ipv6: unify conntrack reassembly expire code with standard one
From: Jesper Dangaard Brouer @ 2012-09-19 15:12 UTC (permalink / raw)
  To: Cong Wang
  Cc: netdev, Netfilter Developers, Herbert Xu, Michal Kubeček,
	David Miller, Hideaki YOSHIFUJI, Patrick McHardy,
	Pablo Neira Ayuso
In-Reply-To: <1348023011-16195-3-git-send-email-amwang@redhat.com>

On Wed, 19 Sep 2012, Cong Wang wrote:

[cut]
> With this patch applied, I can see ICMP Time Exceeded sent
> from the receiver when the sender sent out 3/4 fragmented
> IPv6 UPD packet.

Typo "UPD" -> "UDP"

If people want to redo the IPv6 UDP fragment tests, they can use my scapy 
script, and comment out sending the last fragment:
  https://github.com/netoptimizer/network-testing/blob/master/scapy/ipv6_fragment01.py

Another thing, could you please "mark"/put the version of the patch in the 
subject line, like:

  [PATCH V4 2/4] ipv6: ...

This makes it easier, to follow on which version of the patch people are 
replying to.

With git send-email I think you have to do:

   git send-email --subject-prefix="PATCH V4"

And with stg (stacked git) I usually do:

   stg mail --version "V4" --to netdev ...

Cheers,
   Jesper Brouer

--
-------------------------------------------------------------------
MSc. Master of Computer Science
Dept. of Computer Science, University of Copenhagen
Author of http://www.adsl-optimizer.dk
-------------------------------------------------------------------

^ permalink raw reply

* Re: [PATCH v2 4/9] net/macb: remove macb_get_drvinfo()
From: Ben Hutchings @ 2012-09-19 15:10 UTC (permalink / raw)
  To: Nicolas Ferre
  Cc: netdev, davem, havard, linux-arm-kernel, plagnioj,
	patrice.vilchez, linux-kernel
In-Reply-To: <3fbd9b0eb1e255eccd14ad43044e146776baa963.1348055112.git.nicolas.ferre@atmel.com>

On Wed, 2012-09-19 at 13:55 +0200, Nicolas Ferre wrote:
> This function has little meaning so remove it altogether and
> let ethtool core fill in the fields automatically.
> 
> Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Reviewed-by: Ben Hutchings <bhutchings@solarflare.com>
> ---
>  drivers/net/ethernet/cadence/macb.c | 11 -----------
>  1 file changed, 11 deletions(-)
> 
> diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
> index 2948553..31f945c 100644
> --- a/drivers/net/ethernet/cadence/macb.c
> +++ b/drivers/net/ethernet/cadence/macb.c
> @@ -1217,20 +1217,9 @@ static int macb_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
>  	return phy_ethtool_sset(phydev, cmd);
>  }
>  
> -static void macb_get_drvinfo(struct net_device *dev,
> -			     struct ethtool_drvinfo *info)
> -{
> -	struct macb *bp = netdev_priv(dev);
> -
> -	strcpy(info->driver, bp->pdev->dev.driver->name);
> -	strcpy(info->version, "$Revision: 1.14 $");
> -	strcpy(info->bus_info, dev_name(&bp->pdev->dev));
> -}
> -
>  static const struct ethtool_ops macb_ethtool_ops = {
>  	.get_settings		= macb_get_settings,
>  	.set_settings		= macb_set_settings,
> -	.get_drvinfo		= macb_get_drvinfo,
>  	.get_link		= ethtool_op_get_link,
>  };
>  

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox