Netdev List
 help / color / mirror / Atom feed
* [RFC v3 PATCH 5/4 PATCH] virtio-net: send gratuitous packet when needed
From: Jason Wang @ 2011-10-27  8:57 UTC (permalink / raw)
  To: aliguori, mst, jan.kiszka, rusty, linux-kernel, blauwirbel,
	netdev, stefanha
  Cc: qemu-devel, kvm

As hypervior does not have the knowledge of guest network
configuration, it's better to ask guest to send gratuitous packet when
needed.

This make let virtio-net driver can send gratuitous packet.

Guest check VIRTIO_NET_S_ANNOUNCE during config change interrupt. When
this bit is set, a workqueue would be scheduled to send gratuitous
packet through NETDEV_NOTIFY_PEERS.

This feature is negotiated through bit VIRTIO_NET_F_GUEST_ANNOUNCE.

Changes from v2:
- Fix the race between unregister_dev() and workqueue

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/virtio_net.c   |   32 ++++++++++++++++++++++++++++++--
 include/linux/virtio_net.h |    2 ++
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b8225f3..19ee718 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -71,6 +71,9 @@ struct virtnet_info {
 	/* Work struct for refilling if we run low on memory. */
 	struct delayed_work refill;
 
+	/* Work struct for sending gratituous packet. */
+	struct work_struct announce;
+
 	/* Chain pages by the private ptr. */
 	struct page *pages;
 
@@ -507,6 +510,13 @@ static void refill_work(struct work_struct *work)
 		schedule_delayed_work(&vi->refill, HZ/2);
 }
 
+static void announce_work(struct work_struct *work)
+{
+	struct virtnet_info *vi = container_of(work, struct virtnet_info,
+					       announce);
+	netif_notify_peers(vi->dev);
+}
+
 static int virtnet_poll(struct napi_struct *napi, int budget)
 {
 	struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
@@ -923,11 +933,23 @@ static void virtnet_update_status(struct virtnet_info *vi)
 			      &v, sizeof(v));
 
 	/* Ignore unknown (future) status bits */
-	v &= VIRTIO_NET_S_LINK_UP;
+	v &= VIRTIO_NET_S_LINK_UP | VIRTIO_NET_S_ANNOUNCE;
 
 	if (vi->status == v)
 		return;
 
+	if (v & VIRTIO_NET_S_ANNOUNCE) {
+		v &= ~VIRTIO_NET_S_ANNOUNCE;
+		vi->vdev->config->set(vi->vdev,
+				      offsetof(struct virtio_net_config,
+					       status),
+				      &v, sizeof(v));
+
+		if ((v & VIRTIO_NET_S_LINK_UP) &&
+		    virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ANNOUNCE))
+			schedule_work(&vi->announce);
+	}
+
 	vi->status = v;
 
 	if (vi->status & VIRTIO_NET_S_LINK_UP) {
@@ -1016,6 +1038,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 		goto free;
 
 	INIT_DELAYED_WORK(&vi->refill, refill_work);
+	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE))
+		INIT_WORK(&vi->announce, announce_work);
 	sg_init_table(vi->rx_sg, ARRAY_SIZE(vi->rx_sg));
 	sg_init_table(vi->tx_sg, ARRAY_SIZE(vi->tx_sg));
 
@@ -1075,6 +1099,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 	return 0;
 
 unregister:
+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ANNOUNCE))
+		cancel_work_sync(&vi->announce);
 	unregister_netdev(dev);
 	cancel_delayed_work_sync(&vi->refill);
 free_vqs:
@@ -1115,7 +1141,8 @@ static void __devexit virtnet_remove(struct virtio_device *vdev)
 	/* Stop all the virtqueues. */
 	vdev->config->reset(vdev);
 
-
+	if(virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ANNOUNCE))
+		cancel_work_sync(&vi->announce);
 	unregister_netdev(vi->dev);
 	cancel_delayed_work_sync(&vi->refill);
 
@@ -1144,6 +1171,7 @@ static unsigned int features[] = {
 	VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
 	VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
 	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
+	VIRTIO_NET_F_GUEST_ANNOUNCE,
 };
 
 static struct virtio_driver virtio_net_driver = {
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 970d5a2..44a38d6 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -49,8 +49,10 @@
 #define VIRTIO_NET_F_CTRL_RX	18	/* Control channel RX mode support */
 #define VIRTIO_NET_F_CTRL_VLAN	19	/* Control channel VLAN filtering */
 #define VIRTIO_NET_F_CTRL_RX_EXTRA 20	/* Extra RX mode control support */
+#define VIRTIO_NET_F_GUEST_ANNOUNCE 21  /* Guest can send gratituous packet */
 
 #define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
+#define VIRTIO_NET_S_ANNOUNCE   2       /* Announcement is needed */
 
 struct virtio_net_config {
 	/* The config defining mac address (if VIRTIO_NET_F_MAC) */

^ permalink raw reply related

* [RFC v3 PATCH 4/4] virtio-net: compat guest announce support.
From: Jason Wang @ 2011-10-27  8:49 UTC (permalink / raw)
  To: aliguori, mst, jan.kiszka, rusty, qemu-devel, blauwirbel,
	stefanha
  Cc: netdev, kvm
In-Reply-To: <20111027084700.15020.24087.stgit@dhcp-8-146.nay.redhat.com>

Disable guest announce for compat machine types.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/pc_piix.c |   16 ++++++++++++++++
 1 files changed, 16 insertions(+), 0 deletions(-)

diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 8c7f2b7..6ca50a6 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -340,6 +340,10 @@ static QEMUMachine pc_machine_v0_13 = {
             .driver   = "virtio-net-pci",
             .property = "event_idx",
             .value    = "off",
+        },{
+            .driver   = "virtio-net-pci",
+            .property = "guest_announce",
+            .value    = "off",
         },
         { /* end of list */ }
     },
@@ -383,6 +387,10 @@ static QEMUMachine pc_machine_v0_12 = {
             .driver   = "virtio-net-pci",
             .property = "event_idx",
             .value    = "off",
+        },{
+            .driver   = "virtio-net-pci",
+            .property = "guest_announce",
+            .value    = "off",
         },
         { /* end of list */ }
     }
@@ -434,6 +442,10 @@ static QEMUMachine pc_machine_v0_11 = {
             .driver   = "virtio-net-pci",
             .property = "event_idx",
             .value    = "off",
+        },{
+            .driver   = "virtio-net-pci",
+            .property = "guest_announce",
+            .value    = "off",
         },
         { /* end of list */ }
     }
@@ -497,6 +509,10 @@ static QEMUMachine pc_machine_v0_10 = {
             .driver   = "virtio-net-pci",
             .property = "event_idx",
             .value    = "off",
+        },{
+            .driver   = "virtio-net-pci",
+            .property = "guest_announce",
+            .value    = "off",
         },
         { /* end of list */ }
     },


^ permalink raw reply related

* [RFC v3 PATCH 3/4] virtio-net: notify guest to annouce itself
From: Jason Wang @ 2011-10-27  8:48 UTC (permalink / raw)
  To: aliguori, mst, jan.kiszka, rusty, qemu-devel, blauwirbel,
	stefanha
  Cc: netdev, kvm
In-Reply-To: <20111027084700.15020.24087.stgit@dhcp-8-146.nay.redhat.com>

It's hard to track all mac address and its usage (vlan, bondings,
ipv6) in qemu to send proper gratituous packet. The better choice is
let guest to do it.

The patch introduces a new rw config status bit of virtio-net,
VIRTIO_NET_S_ANNOUNCE which is used to notify guest to announce
presence of its link through config update interrupt. When gust have
done the annoucement, it should clear that bit.

The feature is negotiated by bit VIRTIO_NET_F_ANNOUNCE.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/virtio-net.c |   18 +++++++++++++++++-
 hw/virtio-net.h |    3 +++
 2 files changed, 20 insertions(+), 1 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 8c2f460..5451eec 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -95,6 +95,8 @@ static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
         memcpy(n->mac, netcfg.mac, ETH_ALEN);
         qemu_format_nic_info_str(&n->nic->nc, n->mac);
     }
+
+    memcpy(&n->status, &netcfg.status, sizeof(n->status));
 }
 
 static bool virtio_net_started(VirtIONet *n, uint8_t status)
@@ -227,7 +229,7 @@ static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
 {
     VirtIONet *n = to_virtio_net(vdev);
 
-    features |= (1 << VIRTIO_NET_F_MAC);
+    features |= (1 << VIRTIO_NET_F_MAC | 1 << VIRTIO_NET_F_GUEST_ANNOUNCE);
 
     if (peer_has_vnet_hdr(n)) {
         tap_using_vnet_hdr(n->nic->nc.peer, 1);
@@ -983,6 +985,19 @@ static void virtio_net_cleanup(VLANClientState *nc)
     n->nic = NULL;
 }
 
+static int virtio_net_announce(VLANClientState *nc)
+{
+    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
+
+    if (n->vdev.guest_features & (0x1 << VIRTIO_NET_F_GUEST_ANNOUNCE)) {
+        n->status |= VIRTIO_NET_S_ANNOUNCE;
+        virtio_notify_config(&n->vdev);
+        return 0;
+    }
+
+    return 1;
+}
+
 static NetClientInfo net_virtio_info = {
     .type = NET_CLIENT_TYPE_NIC,
     .size = sizeof(NICState),
@@ -990,6 +1005,7 @@ static NetClientInfo net_virtio_info = {
     .receive = virtio_net_receive,
         .cleanup = virtio_net_cleanup,
     .link_status_changed = virtio_net_set_link_status,
+    .announce = virtio_net_announce,
 };
 
 VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
diff --git a/hw/virtio-net.h b/hw/virtio-net.h
index 4468741..9f8cea7 100644
--- a/hw/virtio-net.h
+++ b/hw/virtio-net.h
@@ -44,8 +44,10 @@
 #define VIRTIO_NET_F_CTRL_RX    18      /* Control channel RX mode support */
 #define VIRTIO_NET_F_CTRL_VLAN  19      /* Control channel VLAN filtering */
 #define VIRTIO_NET_F_CTRL_RX_EXTRA 20   /* Extra RX mode control support */
+#define VIRTIO_NET_F_GUEST_ANNOUNCE 21  /* Guest can announce itself */
 
 #define VIRTIO_NET_S_LINK_UP    1       /* Link is up */
+#define VIRTIO_NET_S_ANNOUNCE   2       /* Announcement is needed */
 
 #define TX_TIMER_INTERVAL 150000 /* 150 us */
 
@@ -176,6 +178,7 @@ struct virtio_net_ctrl_mac {
         DEFINE_PROP_BIT("guest_tso6", _state, _field, VIRTIO_NET_F_GUEST_TSO6, true), \
         DEFINE_PROP_BIT("guest_ecn", _state, _field, VIRTIO_NET_F_GUEST_ECN, true), \
         DEFINE_PROP_BIT("guest_ufo", _state, _field, VIRTIO_NET_F_GUEST_UFO, true), \
+        DEFINE_PROP_BIT("guest_announce", _state, _field, VIRTIO_NET_F_GUEST_ANNOUNCE, true), \
         DEFINE_PROP_BIT("host_tso4", _state, _field, VIRTIO_NET_F_HOST_TSO4, true), \
         DEFINE_PROP_BIT("host_tso6", _state, _field, VIRTIO_NET_F_HOST_TSO6, true), \
         DEFINE_PROP_BIT("host_ecn", _state, _field, VIRTIO_NET_F_HOST_ECN, true), \


^ permalink raw reply related

* [RFC v3 PATCH 2/4] net: model specific announcing support
From: Jason Wang @ 2011-10-27  8:48 UTC (permalink / raw)
  To: aliguori, mst, jan.kiszka, rusty, qemu-devel, blauwirbel,
	stefanha
  Cc: netdev, kvm
In-Reply-To: <20111027084700.15020.24087.stgit@dhcp-8-146.nay.redhat.com>

This patch introduces a function pointer in NetClientInfo which is
called during self announcement to do the model specific announcing.

The first user would be virtio-net.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 net.h    |    2 ++
 savevm.c |    8 +++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/net.h b/net.h
index 9f633f8..7654769 100644
--- a/net.h
+++ b/net.h
@@ -46,6 +46,7 @@ typedef ssize_t (NetReceive)(VLANClientState *, const uint8_t *, size_t);
 typedef ssize_t (NetReceiveIOV)(VLANClientState *, const struct iovec *, int);
 typedef void (NetCleanup) (VLANClientState *);
 typedef void (LinkStatusChanged)(VLANClientState *);
+typedef int (NetAnnounce)(VLANClientState *);
 
 typedef struct NetClientInfo {
     net_client_type type;
@@ -57,6 +58,7 @@ typedef struct NetClientInfo {
     NetCleanup *cleanup;
     LinkStatusChanged *link_status_changed;
     NetPoll *poll;
+    NetAnnounce *announce;
 } NetClientInfo;
 
 struct VLANClientState {
diff --git a/savevm.c b/savevm.c
index 73ee6e2..46389b2 100644
--- a/savevm.c
+++ b/savevm.c
@@ -122,10 +122,12 @@ static void qemu_announce_self_iter(NICState *nic, void *opaque)
 {
     uint8_t buf[60];
     int len;
+    NetAnnounce *func = nic->nc.info->announce;
 
-    len = announce_self_create(buf, nic->conf->macaddr.a);
-
-    qemu_send_packet_raw(&nic->nc, buf, len);
+    if (func == NULL || func(&nic->nc) != 0) {
+        len = announce_self_create(buf, nic->conf->macaddr.a);
+        qemu_send_packet_raw(&nic->nc, buf, len);
+    }
 }
 
 

^ permalink raw reply related

* [RFC v3 PATCH 1/4] announce self after vm start
From: Jason Wang @ 2011-10-27  8:48 UTC (permalink / raw)
  To: aliguori, mst, jan.kiszka, rusty, qemu-devel, blauwirbel,
	stefanha
  Cc: netdev, kvm
In-Reply-To: <20111027084700.15020.24087.stgit@dhcp-8-146.nay.redhat.com>

This patch moves qemu_announce_self() to vm_start() and add a new
parameters to control whether sending gratuitous packet is needed.

This is bacause the following reasons:

- Gratuitous packet is also needed when we resume a stopped vm or
  successfuly load a state.
- The ability of sending gratuitous packet by guest may change the
  state of device, so we need to do it after vm is started.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 gdbstub.c   |    2 +-
 migration.c |    4 ++--
 monitor.c   |    4 ++--
 savevm.c    |    2 +-
 sysemu.h    |    2 +-
 vl.c        |    7 +++++--
 6 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/gdbstub.c b/gdbstub.c
index 4009058..5f6238e 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -371,7 +371,7 @@ static inline void gdb_continue(GDBState *s)
 #ifdef CONFIG_USER_ONLY
     s->running_state = 1;
 #else
-    vm_start();
+    vm_start(false);
 #endif
 }
 
diff --git a/migration.c b/migration.c
index bdca72e..8580fa7 100644
--- a/migration.c
+++ b/migration.c
@@ -90,7 +90,7 @@ void process_incoming_migration(QEMUFile *f)
     DPRINTF("successfully loaded vm state\n");
 
     if (autostart) {
-        vm_start();
+        vm_start(true);
     } else {
         runstate_set(RUN_STATE_PRELAUNCH);
     }
@@ -308,7 +308,7 @@ static void migrate_fd_put_ready(void *opaque)
         }
         if (s->state != MIG_STATE_COMPLETED) {
             if (old_vm_running) {
-                vm_start();
+                vm_start(false);
             }
         }
     }
diff --git a/monitor.c b/monitor.c
index ffda0fe..633e2de 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1252,7 +1252,7 @@ static int do_cont(Monitor *mon, const QDict *qdict, QObject **ret_data)
     bdrv_iterate(encrypted_bdrv_it, &context);
     /* only resume the vm if all keys are set and valid */
     if (!context.err) {
-        vm_start();
+        vm_start(true);
         return 0;
     } else {
         return -1;
@@ -2710,7 +2710,7 @@ static void do_loadvm(Monitor *mon, const QDict *qdict)
     vm_stop(RUN_STATE_RESTORE_VM);
 
     if (load_vmstate(name) == 0 && saved_vm_running) {
-        vm_start();
+        vm_start(true);
     }
 }
 
diff --git a/savevm.c b/savevm.c
index f01838f..73ee6e2 100644
--- a/savevm.c
+++ b/savevm.c
@@ -2077,7 +2077,7 @@ void do_savevm(Monitor *mon, const QDict *qdict)
 
  the_end:
     if (saved_vm_running)
-        vm_start();
+        vm_start(false);
 }
 
 int load_vmstate(const char *name)
diff --git a/sysemu.h b/sysemu.h
index 22cd720..686f1ec 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -34,7 +34,7 @@ void vm_state_notify(int running, RunState state);
 #define VMRESET_SILENT   false
 #define VMRESET_REPORT   true
 
-void vm_start(void);
+void vm_start(bool announce);
 void vm_stop(RunState state);
 void vm_stop_force_state(RunState state);
 
diff --git a/vl.c b/vl.c
index 1ddb17b..e216966 100644
--- a/vl.c
+++ b/vl.c
@@ -1253,7 +1253,7 @@ void vm_state_notify(int running, RunState state)
     }
 }
 
-void vm_start(void)
+void vm_start(bool announce)
 {
     if (!runstate_is_running()) {
         cpu_enable_ticks();
@@ -1261,6 +1261,9 @@ void vm_start(void)
         vm_state_notify(1, RUN_STATE_RUNNING);
         resume_all_vcpus();
         monitor_protocol_event(QEVENT_RESUME, NULL);
+        if (announce) {
+            qemu_announce_self();
+        }
     }
 }
 
@@ -3440,7 +3443,7 @@ int main(int argc, char **argv, char **envp)
             exit(ret);
         }
     } else if (autostart) {
-        vm_start();
+        vm_start(false);
     }
 
     os_setup_post();

^ permalink raw reply related

* [RFC v3 PATCH 0/4] Send gratuitous packets by guest
From: Jason Wang @ 2011-10-27  8:48 UTC (permalink / raw)
  To: aliguori, mst, jan.kiszka, rusty, qemu-devel, blauwirbel,
	stefanha
  Cc: netdev, kvm

We only track primary mac address in qemu and send rarp packets after
migration to notify the switch to update its mac address table. This
may not works when guest have complicated network configurations such
as tagged vlan or ipv6, those connections may be lost or stalled after
migration.

One method to handle them is snooping the network traffic in qemu and
recording use of mac, but this method would hurt performance and is
impossible for network backend such as vhost.

So the best method to address it is to let guest instead of qemu to
send gratuitous packet. This series first add a model specific
fucntion which can let nic model to implement its own announce
method and then implement a virtio-net specific function to
let guest send the gratitous packet.

Changes from v2:

- Conditionally send the notification interrupt to guest (only for
migration, cont, loadvm).
- Remove the unused patch of function export.
- Typos and other comments from Stefan Hajnoczi.
- Disable guest announce for compat machine types.

---

Jason Wang (4):
      announce self after vm start
      net: model specific announcing support
      virtio-net: notify guest to annouce itself
      virtio-net: compat guest announce support.


 gdbstub.c       |    2 +-
 hw/pc_piix.c    |   16 ++++++++++++++++
 hw/virtio-net.c |   18 +++++++++++++++++-
 hw/virtio-net.h |    3 +++
 migration.c     |    4 ++--
 monitor.c       |    4 ++--
 net.h           |    2 ++
 savevm.c        |   10 ++++++----
 sysemu.h        |    2 +-
 vl.c            |    7 +++++--
 10 files changed, 55 insertions(+), 13 deletions(-)

-- 
Jason Wang

^ permalink raw reply

* Re: drivers/net/ethernet/apple
From: Geert Uytterhoeven @ 2011-10-27  8:40 UTC (permalink / raw)
  To: jeffrey.t.kirsher
  Cc: Benjamin Herrenschmidt, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <1319701536.10258.74.camel@jtkirshe-mobl>

On Thu, Oct 27, 2011 at 09:45, Jeff Kirsher <jeffrey.t.kirsher@intel.com> wrote:
> On Wed, 2011-10-26 at 22:16 -0700, Geert Uytterhoeven wrote:
>> On Tue, Oct 25, 2011 at 22:19, Geert Uytterhoeven
>> <geert@linux-m68k.org> wrote:
>> > drivers/net/ethernet/apple/mac89x0.c is a driver for the Crystal
>> Semiconductor
>> > (Now Cirrus Logic) CS89[02]0, so it belongs in
>> drivers/net/ethernet/cirrus,
>> > next to cs89x0.c.
>>
>> And on the first -next run since its inclusion:
>>
>> | drivers/net/ethernet/apple/mac89x0.c:107:20: error: cs89x0.h: No
>> such file or directory
>>
>> it needs the (shared) header file which is in
>> drivers/net/ethernet/cirrus/.
>>
>> http://kisskb.ellerman.id.au/kisskb/buildresult/4835488/
>
> Ah, yeah, now it is coming back to me why I placed cs89x0.[ch] in the
> drivers/net/ethernet/apple/ because it was grouped with the "common"
> drivers.  The intent was to group drivers that use common code together.
>
> I can put together a patch to fix this if you are not able to.

I will create a patch to move mac89x0 to .../cirrus, when I get to compiling a
Mac kernel myself.

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: drivers/net/ethernet/apple
From: Benjamin Herrenschmidt @ 2011-10-27  7:46 UTC (permalink / raw)
  To: Geert Uytterhoeven; +Cc: Jeff Kirsher, netdev, linux-kernel
In-Reply-To: <CAMuHMdXxqKoPNNo9a+VK-Yzu0Qp-8zY2+OhHL6pDPF=dsnNK5Q@mail.gmail.com>

On Tue, 2011-10-25 at 22:19 +0200, Geert Uytterhoeven wrote:
> Hi Jeff,
> 
> drivers/net/ethernet/apple/mac89x0.c is a driver for the Crystal Semiconductor
> (Now Cirrus Logic) CS89[02]0, so it belongs in drivers/net/ethernet/cirrus,
> next to cs89x0.c.
> 
> And according to drivers/net/ethernet/apple/mace.h, "mace" is the
> "Am79C940 MACE (Medium Access Control for Ethernet)", so mace and
> macmace should be in drivers/net/ethernet/amd/.

The later is hard to tell for sure, it's coupled with an Apple DBDMA
chip and wired in odd ways, so ...

Ben.

> Gr{oetje,eeting}s,
> 
>                         Geert
> 
> --
> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
> 
> In personal conversations with technical people, I call myself a hacker. But
> when I'm talking to journalists I just say "programmer" or something like that.
>                                 -- Linus Torvalds
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply

* Re: drivers/net/ethernet/apple
From: Jeff Kirsher @ 2011-10-27  7:45 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: Benjamin Herrenschmidt, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <CAMuHMdUW5PjPU7qNOPA16CbxHJs-bL4r+aFcEGV95SDRBotXRg@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 947 bytes --]

On Wed, 2011-10-26 at 22:16 -0700, Geert Uytterhoeven wrote:
> Hi Jeff,
> 
> On Tue, Oct 25, 2011 at 22:19, Geert Uytterhoeven
> <geert@linux-m68k.org> wrote:
> > drivers/net/ethernet/apple/mac89x0.c is a driver for the Crystal
> Semiconductor
> > (Now Cirrus Logic) CS89[02]0, so it belongs in
> drivers/net/ethernet/cirrus,
> > next to cs89x0.c.
> 
> And on the first -next run since its inclusion:
> 
> | drivers/net/ethernet/apple/mac89x0.c:107:20: error: cs89x0.h: No
> such file or directory
> 
> it needs the (shared) header file which is in
> drivers/net/ethernet/cirrus/.
> 
> http://kisskb.ellerman.id.au/kisskb/buildresult/4835488/ 

Ah, yeah, now it is coming back to me why I placed cs89x0.[ch] in the
drivers/net/ethernet/apple/ because it was grouped with the "common"
drivers.  The intent was to group drivers that use common code together.

I can put together a patch to fix this if you are not able to.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* RE: [PATCH] IPv6: NLM_F_* flag support for route creation/changing when using netlink.
From: Vaittinen, Matti (EXT-Other - FI/Oulu) @ 2011-10-27  7:20 UTC (permalink / raw)
  To: ext Stephen Hemminger, David Miller; +Cc: netdev
In-Reply-To: <9b43e0ef-21cf-4062-b998-849e9e8d6b56@tahiti.vyatta.com>

 


> -----Original Message-----
> From: ext Stephen Hemminger [mailto:stephen.hemminger@vyatta.com] 
> Sent: Thursday, October 27, 2011 10:12 AM
> To: David Miller
> Cc: netdev@vger.kernel.org; Vaittinen, Matti (EXT-Other - FI/Oulu)
> Subject: Re: [PATCH] IPv6: NLM_F_* flag support for route
creation/changing when using netlink.
> 
> 
> > From: "Vaittinen, Matti (EXT-Other - FI/Oulu)"
> > <matti.vaittinen.ext@nsn.com>
> > Date: Thu, 27 Oct 2011 09:26:05 +0300
> > 
> > > 
> > > Will requiring NLM_F_CREATE break lots of existing userspace 
> > > software?
> > 
> > I can almost guarenetee that since we haven't been requiring this,
it 
> > will break almost everything.
> 
> Why not just make it a kernel warning for several releases.
> 
> Just checked, and iproute and quagga will have no problem since they
both already pass the CREATE flag.

For me kernel warning sounds like a reasonable approach. I will change
the patch to not drop the request with error, but to issue a warning
instead. I'll also try using better email client next time.

Furthermore I believe that most tools used for both IPv4 and IPv6
routing (like iproute) do add the CREATE flag because IPv4 route
creation does require it.

^ permalink raw reply

* Re: [PATCH] IPv6: NLM_F_* flag support for route creation/changing when using netlink.
From: Stephen Hemminger @ 2011-10-27  7:12 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, matti vaittinen ext
In-Reply-To: <20111027.030658.1922451477684539766.davem@davemloft.net>


> From: "Vaittinen, Matti (EXT-Other - FI/Oulu)"
> <matti.vaittinen.ext@nsn.com>
> Date: Thu, 27 Oct 2011 09:26:05 +0300
> 
> > 
> > Will requiring NLM_F_CREATE break lots of existing userspace
> > software?
> 
> I can almost guarenetee that since we haven't been requiring this,
> it will break almost everything.

Why not just make it a kernel warning for several releases.

Just checked, and iproute and quagga will have no problem since
they both already pass the CREATE flag.

^ permalink raw reply

* Re: [PATCH] IPv6: NLM_F_* flag support for route creation/changing when using netlink.
From: David Miller @ 2011-10-27  7:06 UTC (permalink / raw)
  To: matti.vaittinen.ext; +Cc: netdev
In-Reply-To: <82C9FC7ED59434458AD4E09AFF2DE230BF099B@FIESEXC006.nsn-intra.net>

From: "Vaittinen, Matti (EXT-Other - FI/Oulu)" <matti.vaittinen.ext@nsn.com>
Date: Thu, 27 Oct 2011 09:26:05 +0300

> 
> Will requiring NLM_F_CREATE break lots of existing userspace software?

I can almost guarenetee that since we haven't been requiring this,
it will break almost everything.

> --- linux-3.1-rc4.orig/net/ipv6/ip6_fib.c	2011-10-26
> 13:15:17.000000000 +0300

Your patch is also severely mangled by your email client and is
thus unusable for us.

Please read Documentation/email-clients.txt before submitting any
more patches.

Thank you.

^ permalink raw reply

* adding nat awareness in routing daemons?
From: David Täht @ 2011-10-27  7:04 UTC (permalink / raw)
  To: netdev, bloat-devel

[-- Attachment #1: Type: text/plain, Size: 1084 bytes --]

after shooting myself in the foot on this several times in the past 
couple months, I thought I'd ask...

Is there a way to determine generically (in linux at least) if outgoing 
addresses on an interface are being NATTed or not? via netlink? I can 
come up with a way to do this via parsing iptables's nat table but 
that's kind of ugly.

The scenario I have is two (or more) routers with their external 
interfaces connected to a shared ethernet/cable segment. Both do NAT on 
their external interfaces for ipv4 (but don't do nat, at least 
currently, for ipv6). Their private networks are usually connected 
together via various means (mesh mostly), which can fail, and it would 
be nice to be able to add the external connectivity as a fallback 
without having to worry about NAT.

e.g. something like

if (nat_status(&this_interface)) {
         if(this_interface & IPV4_NO_NAT) send_route4_calculations();
         if(this_interface & IPV6_NO_NAT) send_route6_calculations();
}

Where I did myself in on this was in the lab

http://io.lab.bufferbloat.net:8080/


-- 
Dave Täht


[-- Attachment #2: dave_taht.vcf --]
[-- Type: text/x-vcard, Size: 204 bytes --]

begin:vcard
fn;quoted-printable:Dave T=C3=A4ht
n;quoted-printable:T=C3=A4ht;Dave
email;internet:dave.taht@gmail.com
tel;home:1-239-829-5608
tel;cell:0638645374
x-mozilla-html:FALSE
version:2.1
end:vcard


^ permalink raw reply

* Re: [PATCH v2 1/4] SUNRPC: rpcbind clients internals virtualization
From: Bryan Schumaker @ 2011-10-27  6:43 UTC (permalink / raw)
  To: Stanislav Kinsbursky
  Cc: Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA,
	linux-nfs-u79uwXL29TY76Z2rM5mHXA, xemul-bzQdu9zFT3WakBO8gow8eQ,
	neilb-l3A5Bk7waGM, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	bfields-uC3wQj2KruNg9hUCZPvPmw, davem-fT/PcQaiUtIeIZ0/mPfg9Q,
	devel-GEFAQzZX7r8dnm+yROfE0A
In-Reply-To: <20111025135749.5286.57091.stgit-bi+AKbBUZKagILUCTcTcHdKyNwTtLsGr@public.gmane.org>

On 10/25/2011 10:57 AM, Stanislav Kinsbursky wrote:
> This patch moves static rpcbind internals to sunrpc part of network namespace

Rather than saying that this patch moves the static internals, could you say that it creates the new network namespace internals?  The next patch switches things over, so it's not really correct to say that this one moves everything.

Alternatively, could this patch be merged with the next one so that you create and switch over to the new internals all at once?

- Bryan

> context. This will allow to create rcpbind clients per network namespace.
> 
> Signed-off-by: Stanislav Kinsbursky <skinsbursky-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
> 
> ---
>  net/sunrpc/netns.h |    5 +++++
>  1 files changed, 5 insertions(+), 0 deletions(-)
> 
> diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
> index d013bf2..83eede3 100644
> --- a/net/sunrpc/netns.h
> +++ b/net/sunrpc/netns.h
> @@ -9,6 +9,11 @@ struct cache_detail;
>  struct sunrpc_net {
>  	struct proc_dir_entry *proc_net_rpc;
>  	struct cache_detail *ip_map_cache;
> +
> +	struct rpc_clnt *rpcb_local_clnt;
> +	struct rpc_clnt *rpcb_local_clnt4;
> +	spinlock_t rpcb_clnt_lock;
> +	unsigned int rpcb_users;
>  };
>  
>  extern int sunrpc_net_id;
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH] IPv6: NLM_F_* flag support for route creation/changing when using netlink.
From: Vaittinen, Matti (EXT-Other - FI/Oulu) @ 2011-10-27  6:26 UTC (permalink / raw)
  To: davem; +Cc: netdev


Hi!

This patch enables checks for NLM_F_CREATE, NLM_F_REPLACE and NLM_F_EXCL
flags for IPv6 route creation. Checks are performed if netlink header in
info structure is non NULL. Patch is created against Linux 3.1.0-rc4
(Downloaded from kernel.org).

In a nutshell:
NLM_F_CREATE flag is required if new IPv6 route is being created.
If route with same key and metric exists, the route will be changed if
NLM_F_REPLACE flag is given. Else -EEXIST is returned.
Either NLM_F_CREATE or NLM_F_REPLACE must be specified in RTM_NEWROUTE
messages.


Thing to consider:
Will requiring NLM_F_CREATE break lots of existing userspace software?
Anyways, I believe this is justified. Especially in cases where user
wants to change route if it exists, but not create new one if no route
exists. And anyways, creating new routes when NLM_F_CREATE is not
specified is unexpected.



Signed-off-by: Matti Vaittinen <Mazziesaccount@gmail.com>
---
diff -uNr linux-3.1-rc4.orig/net/ipv6/ip6_fib.c
linux-3.1-rc4.new/net/ipv6/ip6_fib.c
--- linux-3.1-rc4.orig/net/ipv6/ip6_fib.c	2011-10-26
13:15:17.000000000 +0300
+++ linux-3.1-rc4.new/net/ipv6/ip6_fib.c	2011-10-26
14:03:25.000000000 +0300
@@ -39,6 +39,7 @@
 #include <net/ip6_fib.h>
 #include <net/ip6_route.h>
 
+#define RT6_CANT_CREATE ((int)-1)
 #define RT6_DEBUG 2
 
 #if RT6_DEBUG >= 3
@@ -429,7 +430,7 @@
 
 static struct fib6_node * fib6_add_1(struct fib6_node *root, void
*addr,
 				     int addrlen, int plen,
-				     int offset)
+				     int offset, int allow_create)
 {
 	struct fib6_node *fn, *in, *ln;
 	struct fib6_node *pn = NULL;
@@ -451,8 +452,11 @@
 		 *	Prefix match
 		 */
 		if (plen < fn->fn_bit ||
-		    !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
+		    !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) {
+			if (!allow_create)
+				return (struct fib6_node
*)RT6_CANT_CREATE;
 			goto insert_above;
+		}
 
 		/*
 		 *	Exact match ?
@@ -485,6 +489,8 @@
 	 *	We walked to the bottom of tree.
 	 *	Create new leaf node without children.
 	 */
+	if (!allow_create)
+		return (struct fib6_node *)RT6_CANT_CREATE;
 
 	ln = node_alloc();
 
@@ -618,6 +624,12 @@
 {
 	struct rt6_info *iter = NULL;
 	struct rt6_info **ins;
+	int replace = (NULL != info &&
+	    NULL != info->nlh &&
+	    (info->nlh->nlmsg_flags&NLM_F_REPLACE));
+	int add = ((NULL == info || NULL == info->nlh) ||
+	    (info->nlh->nlmsg_flags&NLM_F_CREATE));
+	int found = 0;
 
 	ins = &fn->leaf;
 
@@ -630,6 +642,13 @@
 			/*
 			 *	Same priority level
 			 */
+			if (NULL != info->nlh &&
+			    (info->nlh->nlmsg_flags&NLM_F_EXCL))
+				return -EEXIST;
+			if (replace) {
+				found++;
+				break;
+			}
 
 			if (iter->rt6i_dev == rt->rt6i_dev &&
 			    iter->rt6i_idev == rt->rt6i_idev &&
@@ -659,19 +678,37 @@
 	/*
 	 *	insert node
 	 */
+	if (!replace) {
+		if (!add)
+			return -EINVAL;
+		rt->dst.rt6_next = iter;
+		*ins = rt;
+		rt->rt6i_node = fn;
+		atomic_inc(&rt->rt6i_ref);
+		inet6_rt_notify(RTM_NEWROUTE, rt, info);
+		info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
+
+		if ((fn->fn_flags & RTN_RTINFO) == 0) {
+			info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
+			fn->fn_flags |= RTN_RTINFO;
+		}
 
-	rt->dst.rt6_next = iter;
-	*ins = rt;
-	rt->rt6i_node = fn;
-	atomic_inc(&rt->rt6i_ref);
-	inet6_rt_notify(RTM_NEWROUTE, rt, info);
-	info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
-
-	if ((fn->fn_flags & RTN_RTINFO) == 0) {
-		info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
-		fn->fn_flags |= RTN_RTINFO;
+	} else {
+		if (!found)
+			return -ENOENT;
+		*ins = rt;
+		rt->rt6i_node = fn;
+		rt->dst.rt6_next = iter->dst.rt6_next;
+		atomic_inc(&rt->rt6i_ref);
+		inet6_rt_notify(RTM_NEWROUTE, rt, info);
+		rt6_release(iter);
+		if ((fn->fn_flags & RTN_RTINFO) == 0) {
+			info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
+			fn->fn_flags |= RTN_RTINFO;
+		}
 	}
 
+
 	return 0;
 }
 
@@ -701,9 +738,31 @@
 	struct fib6_node *fn, *pn = NULL;
 	int err = -ENOMEM;
 
+	int allow_create = 1;
+	int allow_replace = 1;
+	if (NULL != info &&
+	    NULL != info->nlh &&
+	    !(info->nlh->nlmsg_flags&NLM_F_REPLACE)) {
+		allow_replace = 0;
+	}
+	if (NULL != info &&
+	    NULL != info->nlh &&
+	    !(info->nlh->nlmsg_flags&NLM_F_CREATE)) {
+			allow_create = 0;
+	}
+	if (!(allow_replace || allow_create)) {
+		err = -EINVAL;
+		fn = NULL;
+		goto out;
+	}
 	fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct
in6_addr),
-			rt->rt6i_dst.plen, offsetof(struct rt6_info,
rt6i_dst));
+		    rt->rt6i_dst.plen, offsetof(struct rt6_info,
rt6i_dst),
+		    allow_create);
 
+	if (RT6_CANT_CREATE == (int)fn) {
+		err = -EINVAL;
+		fn = NULL;
+	}
 	if (fn == NULL)
 		goto out;
 
@@ -716,6 +775,11 @@
 		if (fn->subtree == NULL) {
 			struct fib6_node *sfn;
 
+			if (!allow_create) {
+				err = -EINVAL;
+				fn = NULL;
+				goto out;
+			}
 			/*
 			 * Create subtree.
 			 *
@@ -740,7 +804,7 @@
 
 			sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
 					sizeof(struct in6_addr),
rt->rt6i_src.plen,
-					offsetof(struct rt6_info,
rt6i_src));
+					offsetof(struct rt6_info,
rt6i_src), 1);
 
 			if (sn == NULL) {
 				/* If it is failed, discard just
allocated
@@ -757,8 +821,13 @@
 		} else {
 			sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
 					sizeof(struct in6_addr),
rt->rt6i_src.plen,
-					offsetof(struct rt6_info,
rt6i_src));
+					offsetof(struct rt6_info,
rt6i_src),
+					allow_create);
 
+			if (RT6_CANT_CREATE == (int)sn) {
+				err = -EINVAL;
+				sn = NULL;
+			}
 			if (sn == NULL)
 				goto st_failure;
 		}
diff -uNr linux-3.1-rc4.orig/net/ipv6/route.c
linux-3.1-rc4.new/net/ipv6/route.c
--- linux-3.1-rc4.orig/net/ipv6/route.c	2011-10-26 13:15:17.000000000
+0300
+++ linux-3.1-rc4.new/net/ipv6/route.c	2011-10-26 14:04:29.000000000
+0300
@@ -1223,9 +1223,15 @@
 	if (cfg->fc_metric == 0)
 		cfg->fc_metric = IP6_RT_PRIO_USER;
 
-	table = fib6_new_table(net, cfg->fc_table);
+	if (NULL != cfg->fc_nlinfo.nlh &&
+	    !(cfg->fc_nlinfo.nlh->nlmsg_flags&NLM_F_CREATE)) {
+		err = -EINVAL;
+		table = fib6_get_table(net, cfg->fc_table);
+	} else {
+		err = -ENOBUFS;
+		table = fib6_new_table(net, cfg->fc_table);
+	}
 	if (table == NULL) {
-		err = -ENOBUFS;
 		goto out;
 	}
 








--
- Matti Vaittinen


Theory:
Theoretical approach means that everything is well known, but still
nothing works.
Practice:
Practical approach means that everything works but no one knows why.

Thank God we have theory and practice balanced here. Nothing works, and
no one knows why...

^ permalink raw reply

* [PATCH 3/3] stmmac: update normal descriptor structure (v2)
From: Giuseppe CAVALLARO @ 2011-10-27  5:43 UTC (permalink / raw)
  To: netdev; +Cc: Giuseppe Cavallaro, Kelvin Cheung
In-Reply-To: <1319694189-25223-1-git-send-email-peppe.cavallaro@st.com>

This patch updates the normal descriptor structure
to work fine on new GMAC Synopsys chips.

Normal descriptors were designed on the old MAC10/100
databook 1.91 where some bits were reserved: for example
the tx checksum insertion and rx checksum offload.

The patch maintains the back-compatibility with old
MAC devices (tested on STx7109 MAC10/100) and adds new
fields that actually new GMAC devices can use.

For example, STx7109 (MAC10/100) will pass from the platform
  tx_coe = 0, enh_desc = 0, has_gmac = 0.
A platform like Loongson1B (GMAC) will pass:
  tx_coe = 1, enh_desc = 0, has_gmac = 1.

Thanks to Kelvin, he enhanced the normal descriptors for
GMAC (on MIPS Loongson1B platform).

Signed-off-by: Kelvin Cheung <keguang.zhang@gmail.com>
Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 drivers/net/ethernet/stmicro/stmmac/common.h       |    8 ++--
 drivers/net/ethernet/stmicro/stmmac/descs.h        |   31 +++++++++-------
 drivers/net/ethernet/stmicro/stmmac/norm_desc.c    |   38 +++++++++++---------
 .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c   |    8 ++--
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |    6 ++-
 5 files changed, 51 insertions(+), 40 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 9100c10..2cc1192 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -49,7 +49,7 @@ struct stmmac_extra_stats {
 	unsigned long tx_underflow ____cacheline_aligned;
 	unsigned long tx_carrier;
 	unsigned long tx_losscarrier;
-	unsigned long tx_heartbeat;
+	unsigned long vlan_tag;
 	unsigned long tx_deferred;
 	unsigned long tx_vlan;
 	unsigned long tx_jabber;
@@ -58,9 +58,9 @@ struct stmmac_extra_stats {
 	unsigned long tx_ip_header_error;
 	/* Receive errors */
 	unsigned long rx_desc;
-	unsigned long rx_partial;
-	unsigned long rx_runt;
-	unsigned long rx_toolong;
+	unsigned long sa_filter_fail;
+	unsigned long overflow_error;
+	unsigned long ipc_csum_error;
 	unsigned long rx_collision;
 	unsigned long rx_crc;
 	unsigned long rx_length;
diff --git a/drivers/net/ethernet/stmicro/stmmac/descs.h b/drivers/net/ethernet/stmicro/stmmac/descs.h
index 63a03e2..9820ec8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/descs.h
+++ b/drivers/net/ethernet/stmicro/stmmac/descs.h
@@ -25,33 +25,34 @@ struct dma_desc {
 	union {
 		struct {
 			/* RDES0 */
-			u32 reserved1:1;
+			u32 payload_csum_error:1;
 			u32 crc_error:1;
 			u32 dribbling:1;
 			u32 mii_error:1;
 			u32 receive_watchdog:1;
 			u32 frame_type:1;
 			u32 collision:1;
-			u32 frame_too_long:1;
+			u32 ipc_csum_error:1;
 			u32 last_descriptor:1;
 			u32 first_descriptor:1;
-			u32 multicast_frame:1;
-			u32 run_frame:1;
+			u32 vlan_tag:1;
+			u32 overflow_error:1;
 			u32 length_error:1;
-			u32 partial_frame_error:1;
+			u32 sa_filter_fail:1;
 			u32 descriptor_error:1;
 			u32 error_summary:1;
 			u32 frame_length:14;
-			u32 filtering_fail:1;
+			u32 da_filter_fail:1;
 			u32 own:1;
 			/* RDES1 */
 			u32 buffer1_size:11;
 			u32 buffer2_size:11;
-			u32 reserved2:2;
+			u32 reserved1:2;
 			u32 second_address_chained:1;
 			u32 end_ring:1;
-			u32 reserved3:5;
+			u32 reserved2:5;
 			u32 disable_ic:1;
+
 		} rx;
 		struct {
 			/* RDES0 */
@@ -91,24 +92,28 @@ struct dma_desc {
 			u32 underflow_error:1;
 			u32 excessive_deferral:1;
 			u32 collision_count:4;
-			u32 heartbeat_fail:1;
+			u32 vlan_frame:1;
 			u32 excessive_collisions:1;
 			u32 late_collision:1;
 			u32 no_carrier:1;
 			u32 loss_carrier:1;
-			u32 reserved1:3;
+			u32 payload_error:1;
+			u32 frame_flushed:1;
+			u32 jabber_timeout:1;
 			u32 error_summary:1;
-			u32 reserved2:15;
+			u32 ip_header_error:1;
+			u32 time_stamp_status:1;
+			u32 reserved1:13;
 			u32 own:1;
 			/* TDES1 */
 			u32 buffer1_size:11;
 			u32 buffer2_size:11;
-			u32 reserved3:1;
+			u32 time_stamp_enable:1;
 			u32 disable_padding:1;
 			u32 second_address_chained:1;
 			u32 end_ring:1;
 			u32 crc_disable:1;
-			u32 reserved4:2;
+			u32 checksum_insertion:2;
 			u32 first_segment:1;
 			u32 last_segment:1;
 			u32 interrupt:1;
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index f7e8ba7..fda5d2b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -50,11 +50,12 @@ static int ndesc_get_tx_status(void *data, struct stmmac_extra_stats *x,
 			stats->collisions += p->des01.tx.collision_count;
 		ret = -1;
 	}
-	if (unlikely(p->des01.tx.heartbeat_fail)) {
-		x->tx_heartbeat++;
-		stats->tx_heartbeat_errors++;
-		ret = -1;
+
+	if (p->des01.etx.vlan_frame) {
+		CHIP_DBG(KERN_INFO "GMAC TX status: VLAN frame\n");
+		x->tx_vlan++;
 	}
+
 	if (unlikely(p->des01.tx.deferred))
 		x->tx_deferred++;
 
@@ -68,12 +69,12 @@ static int ndesc_get_tx_len(struct dma_desc *p)
 
 /* This function verifies if each incoming frame has some errors
  * and, if required, updates the multicast statistics.
- * In case of success, it returns csum_none because the device
- * is not able to compute the csum in HW. */
+ * In case of success, it returns good_frame because the GMAC device
+ * is supposed to be able to compute the csum in HW. */
 static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x,
 			       struct dma_desc *p)
 {
-	int ret = csum_none;
+	int ret = good_frame;
 	struct net_device_stats *stats = (struct net_device_stats *)data;
 
 	if (unlikely(p->des01.rx.last_descriptor == 0)) {
@@ -86,12 +87,12 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x,
 	if (unlikely(p->des01.rx.error_summary)) {
 		if (unlikely(p->des01.rx.descriptor_error))
 			x->rx_desc++;
-		if (unlikely(p->des01.rx.partial_frame_error))
-			x->rx_partial++;
-		if (unlikely(p->des01.rx.run_frame))
-			x->rx_runt++;
-		if (unlikely(p->des01.rx.frame_too_long))
-			x->rx_toolong++;
+		if (unlikely(p->des01.rx.sa_filter_fail))
+			x->sa_filter_fail++;
+		if (unlikely(p->des01.rx.overflow_error))
+			x->overflow_error++;
+		if (unlikely(p->des01.rx.ipc_csum_error))
+			x->ipc_csum_error++;
 		if (unlikely(p->des01.rx.collision)) {
 			x->rx_collision++;
 			stats->collisions++;
@@ -113,10 +114,10 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x,
 		x->rx_mii++;
 		ret = discard_frame;
 	}
-	if (p->des01.rx.multicast_frame) {
-		x->rx_multicast++;
-		stats->multicast++;
-	}
+#ifdef STMMAC_VLAN_TAG_USED
+	if (p->des01.rx.vlan_tag)
+		x->vlan_tag++;
+#endif
 	return ret;
 }
 
@@ -184,6 +185,9 @@ static void ndesc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
 {
 	p->des01.tx.first_segment = is_fs;
 	norm_set_tx_desc_len(p, len);
+
+	if (likely(csum_flag))
+		p->des01.tx.checksum_insertion = cic_full;
 }
 
 static void ndesc_clear_tx_ic(struct dma_desc *p)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 406404f..e8eff09 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -50,7 +50,7 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
 	STMMAC_STAT(tx_underflow),
 	STMMAC_STAT(tx_carrier),
 	STMMAC_STAT(tx_losscarrier),
-	STMMAC_STAT(tx_heartbeat),
+	STMMAC_STAT(vlan_tag),
 	STMMAC_STAT(tx_deferred),
 	STMMAC_STAT(tx_vlan),
 	STMMAC_STAT(rx_vlan),
@@ -59,9 +59,9 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
 	STMMAC_STAT(tx_payload_error),
 	STMMAC_STAT(tx_ip_header_error),
 	STMMAC_STAT(rx_desc),
-	STMMAC_STAT(rx_partial),
-	STMMAC_STAT(rx_runt),
-	STMMAC_STAT(rx_toolong),
+	STMMAC_STAT(sa_filter_fail),
+	STMMAC_STAT(overflow_error),
+	STMMAC_STAT(ipc_csum_error),
 	STMMAC_STAT(rx_collision),
 	STMMAC_STAT(rx_crc),
 	STMMAC_STAT(rx_length),
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index f77eaa6..451aa60 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -813,6 +813,7 @@ static u32 stmmac_get_synopsys_id(struct stmmac_priv *priv)
 static int stmmac_get_hw_features(struct stmmac_priv *priv)
 {
 	u32 hw_cap = 0;
+
 	if (priv->hw->dma->get_hw_feature) {
 		hw_cap = priv->hw->dma->get_hw_feature(priv->ioaddr);
 
@@ -938,6 +939,7 @@ static int stmmac_open(struct net_device *dev)
 
 	stmmac_get_hw_features(priv);
 
+	priv->rx_coe = priv->hw->mac->rx_coe(priv->ioaddr);
 	if (priv->rx_coe)
 		pr_info("stmmac: Rx Checksum Offload Engine supported\n");
 	if (priv->plat->tx_coe)
@@ -1275,8 +1277,8 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
 #endif
 			skb->protocol = eth_type_trans(skb, priv->dev);
 
-			if (unlikely(status == csum_none)) {
-				/* always for the old mac 10/100 */
+			if (unlikely(!priv->rx_coe)) {
+				/* No RX COE for old mac10/100 devices */
 				skb_checksum_none_assert(skb);
 				netif_receive_skb(skb);
 			} else {
-- 
1.7.4.4

^ permalink raw reply related

* [PATCH 2/3] stmmac: fix NULL pointer dereference in capabilities fixup (v2)
From: Giuseppe CAVALLARO @ 2011-10-27  5:43 UTC (permalink / raw)
  To: netdev; +Cc: Angus Clark
In-Reply-To: <1319694189-25223-1-git-send-email-peppe.cavallaro@st.com>

From: Angus Clark <angus.clark@st.com>

Signed-off-by: Angus Clark <angus.clark@st.com>
Acked-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index fcdd5a2..f77eaa6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -325,7 +325,7 @@ static int stmmac_init_phy(struct net_device *dev)
 	    (interface == PHY_INTERFACE_MODE_RMII))) {
 		phydev->supported &= (PHY_BASIC_FEATURES | SUPPORTED_Pause |
 				      SUPPORTED_Asym_Pause);
-		priv->phydev->advertising = priv->phydev->supported;
+		phydev->advertising = phydev->supported;
 	}
 
 	/*
-- 
1.7.4.4

^ permalink raw reply related

* [PATCH 1/3] stmmac: fix a bug while checking the HW cap reg (v2)
From: Giuseppe CAVALLARO @ 2011-10-27  5:43 UTC (permalink / raw)
  To: netdev; +Cc: Giuseppe Cavallaro
In-Reply-To: <1319637339-14866-1-git-send-email-peppe.cavallaro@st.com>

The patch fixes a bug while checking the HW cap reg
on old MAC10/100 where this feature is not available.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |    5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index aeaa15b..fcdd5a2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -812,9 +812,10 @@ static u32 stmmac_get_synopsys_id(struct stmmac_priv *priv)
  */
 static int stmmac_get_hw_features(struct stmmac_priv *priv)
 {
-	u32 hw_cap = priv->hw->dma->get_hw_feature(priv->ioaddr);
+	u32 hw_cap = 0;
+	if (priv->hw->dma->get_hw_feature) {
+		hw_cap = priv->hw->dma->get_hw_feature(priv->ioaddr);
 
-	if (likely(hw_cap)) {
 		priv->dma_cap.mbps_10_100 = (hw_cap & DMA_HW_FEAT_MIISEL);
 		priv->dma_cap.mbps_1000 = (hw_cap & DMA_HW_FEAT_GMIISEL) >> 1;
 		priv->dma_cap.half_duplex = (hw_cap & DMA_HW_FEAT_HDSEL) >> 2;
-- 
1.7.4.4

^ permalink raw reply related

* Re: [PATCH 2/3] stmmac: fix NULL pointer dereference in capabilities fixup
From: Giuseppe CAVALLARO @ 2011-10-27  5:36 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: netdev, keguang.zhang, Angus Clark
In-Reply-To: <1319668008.6759.2.camel@deadeye>

On 10/27/2011 12:26 AM, Ben Hutchings wrote:
> On Wed, 2011-10-26 at 15:55 +0200, Giuseppe CAVALLARO wrote:
>> From: Angus Clark <angus.clark@st.com>
>>
>> Signed-off-by: Angus Clark <angus.clark@st.com>
>> Acked-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
>> ---
>>  drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |    2 +-
>>  1 files changed, 1 insertions(+), 1 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
>> index fcdd5a2..f77eaa6 100644
>> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
>> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
>> @@ -325,7 +325,7 @@ static int stmmac_init_phy(struct net_device *dev)
>>  	    (interface == PHY_INTERFACE_MODE_RMII))) {
>>  		phydev->supported &= (PHY_BASIC_FEATURES | SUPPORTED_Pause |
>>  				      SUPPORTED_Asym_Pause);
>> -		priv->phydev->advertising = priv->phydev->supported;
>> +		priv->phydev->advertising = phydev->supported;
> 
> How can this fix the bug?  You mean:
> 
> 		phydev->advertising = phydev->supported;

Ben, you are rigth.
I did a mistake importing the patch from Angus.

I'm fixing and resending it again.

Thanks and Sorry
Peppe

> 
> Ben.
> 
>>  	}
>>  
>>  	/*
> 

^ permalink raw reply

* Re: [PATCH] Add TCP_NO_DELAYED_ACK socket option
From: Andy Lutomirski @ 2011-10-27  5:35 UTC (permalink / raw)
  To: Rick Jones; +Cc: netdev
In-Reply-To: <4EA86837.60003@hp.com>

On Wed, Oct 26, 2011 at 1:06 PM, Rick Jones <rick.jones2@hp.com> wrote:
>>> If the networks where this happens are indeed truly private, can they run
>>> a
>>> private kernel?  Or use an LD_PRELOAD hack to wedge-in a
>>> setsockopt(TCP_NODELAY) call into the application?  Or set something like
>>> tcp_naglim_def on the application system(s)?  Or have the server
>>> application
>>> make a setsockopt(TCP_MAXSEG) call before listen() to a value one byte
>>> below
>>> that of what the application is sending?
>>
>> We control our server.  We don't control the server at the other end.
>> We've tried to get them to do any of the above, but they seem
>> unwilling or unable to do it.  I suspect that they're using various
>> pieces from various third-party vendors that just don't care.
>
> Making the setsockopt(TCP_MAXSEG) would be at your end :)  Presumably based
> on the minimum message size.  That would cause the connection to have an MSS
> == the request size so every request send should take the "is this send plus
> any queued unsent data >= MSS" path.

That's cute.  The messages are variable-size (but they don't vary
much), so doing this would probably be worse for the network than
having them set TCP_NODELAY or having us turn off delayed acks, but we
don't really care about the network, and it might work well.

>
> Another "at your end" possibility would be setting a rather small SO_RCVBUF
> size at your end before calling listen(), in hopes of triggering the window
> update.

That scares me.  If they every start sending in bursts (it happens on
occasion), then we lose if they would want to exceed an artificially
small window.

>
>>> Is the application actually "virtuous" in sending logically associated
>>> data
>>> in one "send" call, and simply running afoul of Nagle+DelayedACK in
>>> having
>>> multiple distinct requests outstanding at once, or is it actually quite
>>> evil
>>> in that it is sending logically associated data in separate send calls?
>>>
>>
>> The remote application generates messages meant for us, and they
>> appear to send each message in its own segment.  I don't have the
>> source, so I don't know whether they're really using one send call per
>> message or whether they're using MSG_MORE, TCP_CORK, so some other
>> mechanism.  Each message is time-sensitive and should be received as
>> soon as possible afterq its sent (i.e. one-half rtt).  Unfortunately,
>> when they send two messages and we don't ack the first one, the second
>> gets delayed.  Turning off delayed acks helps but does not completely
>> solve the problem.
>
> If it is write,write,read  (multiple sends per logical message) in a packet
> trace you should see a partial request in the first segment, followed by the
> rest of the request  (and perhaps the second through Nth) in the second
> segment.  Or, I suppose your server application would have a receive
> complete with the first part of the first request, getting the second part
> of the request in a subsequent receive call.
>
> If it is multiple requests at a time each sent in one send call, you should
> see a first segment arriving with a complete request within it, followed by
> a second segment with the next request(s).

These are asynchronous messages and we don't reply to the vast
majority of them.  We see one request arriving per segment.

I'll play with TCP_MAXSEG.  But I'll probably leave TCP_NO_DELAYED_ACK
patched in to my kernel for the time being.  I'm not thrilled about
forcing the other side to split their messages across multiple
segments.

--Andy

^ permalink raw reply

* [PATCH] xfrm: fix error checking in xfrm_output_gso
From: Yan, Zheng @ 2011-10-27  5:33 UTC (permalink / raw)
  To: netdev@vger.kernel.org; +Cc: Herbert Xu, davem@davemloft.net

xfrm_output2() returns 1 on success. This bug makes xfrm_output_gso()
drop all segments except the first one.

Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
---
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 47bacd8..04e963a 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -159,7 +159,7 @@ static int xfrm_output_gso(struct sk_buff *skb)
 		segs->next = NULL;
 		err = xfrm_output2(segs);
 
-		if (unlikely(err)) {
+		if (unlikely(err < 0)) {
 			while ((segs = nskb)) {
 				nskb = segs->next;
 				segs->next = NULL;

^ permalink raw reply related

* Re: [PATCH] ipv6: tcp: fix TCLASS value in ACK messages sent from TIME_WAIT
From: David Miller @ 2011-10-27  5:32 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev
In-Reply-To: <1319690022.3436.10.camel@edumazet-laptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 27 Oct 2011 06:33:42 +0200

> [PATCH] ipv6: tcp: fix TCLASS value in ACK messages sent from TIME_WAIT
> 
> commit 66b13d99d96a (ipv4: tcp: fix TOS value in ACK messages sent from
> TIME_WAIT) fixed IPv4 only.
> 
> This part is for the IPv6 side, adding a tclass param to ip6_xmit()
> 
> We alias tw_tclass and tw_tos, if socket family is INET6.
> 
> [ if sockets is ipv4-mapped, only IP_TOS socket option is used to fill
> TOS field, TCLASS is not taken into account ]
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Applied, thanks!

^ permalink raw reply

* Re: drivers/net/ethernet/apple
From: Geert Uytterhoeven @ 2011-10-27  5:16 UTC (permalink / raw)
  To: Jeff Kirsher; +Cc: Benjamin Herrenschmidt, netdev, linux-kernel
In-Reply-To: <CAMuHMdXxqKoPNNo9a+VK-Yzu0Qp-8zY2+OhHL6pDPF=dsnNK5Q@mail.gmail.com>

Hi Jeff,

On Tue, Oct 25, 2011 at 22:19, Geert Uytterhoeven <geert@linux-m68k.org> wrote:
> drivers/net/ethernet/apple/mac89x0.c is a driver for the Crystal Semiconductor
> (Now Cirrus Logic) CS89[02]0, so it belongs in drivers/net/ethernet/cirrus,
> next to cs89x0.c.

And on the first -next run since its inclusion:

| drivers/net/ethernet/apple/mac89x0.c:107:20: error: cs89x0.h: No
such file or directory

it needs the (shared) header file which is in drivers/net/ethernet/cirrus/.

http://kisskb.ellerman.id.au/kisskb/buildresult/4835488/

> And according to drivers/net/ethernet/apple/mace.h, "mace" is the
> "Am79C940 MACE (Medium Access Control for Ethernet)", so mace and
> macmace should be in drivers/net/ethernet/amd/.

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: [PATCH] ipv6: tcp: fix TCLASS value in ACK messages sent from TIME_WAIT
From: Eric Dumazet @ 2011-10-27  4:33 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20111026.160159.696019885442921932.davem@davemloft.net>

Le mercredi 26 octobre 2011 à 16:01 -0400, David Miller a écrit :
> From: David Miller <davem@davemloft.net>
> Date: Wed, 26 Oct 2011 15:59:08 -0400 (EDT)
> 
> > From: Eric Dumazet <eric.dumazet@gmail.com>
> > Date: Wed, 26 Oct 2011 08:42:11 +0200
> > 
> >> commit 66b13d99d96a (ipv4: tcp: fix TOS value in ACK messages sent from
> >> TIME_WAIT) fixed IPv4 only.
> >> 
> >> This part is for the IPv6 side.
> >> 
> >> We alias tw_tclass and tw_tos, if socket family is INET6.
> >> 
> >> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> > 
> > Applied.
> 
> Nevermind...  reverted.

Oh well, sorry, here is an updated patch

[PATCH] ipv6: tcp: fix TCLASS value in ACK messages sent from TIME_WAIT

commit 66b13d99d96a (ipv4: tcp: fix TOS value in ACK messages sent from
TIME_WAIT) fixed IPv4 only.

This part is for the IPv6 side, adding a tclass param to ip6_xmit()

We alias tw_tclass and tw_tos, if socket family is INET6.

[ if sockets is ipv4-mapped, only IP_TOS socket option is used to fill
TOS field, TCLASS is not taken into account ]

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
V2: changes dccp/sctp as they use ip6_xmit()
 include/net/inet_timewait_sock.h |    1 +
 include/net/ipv6.h               |    3 ++-
 net/dccp/ipv6.c                  |    4 ++--
 net/ipv4/tcp_minisocks.c         |    1 +
 net/ipv6/inet6_connection_sock.c |    2 +-
 net/ipv6/ip6_output.c            |    7 ++-----
 net/ipv6/tcp_ipv6.c              |   17 +++++++++--------
 net/sctp/ipv6.c                  |    2 +-
 8 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 180231c..f91a1fb 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -134,6 +134,7 @@ struct inet_timewait_sock {
 	struct inet_bind_bucket	*tw_tb;
 	struct hlist_node	tw_death_node;
 };
+#define tw_tclass tw_tos
 
 static inline void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
 				      struct hlist_nulls_head *list)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 3b5ac1f..a366a8a 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -486,7 +486,8 @@ extern int			ip6_rcv_finish(struct sk_buff *skb);
 extern int			ip6_xmit(struct sock *sk,
 					 struct sk_buff *skb,
 					 struct flowi6 *fl6,
-					 struct ipv6_txoptions *opt);
+					 struct ipv6_txoptions *opt,
+					 int tclass);
 
 extern int			ip6_nd_hdr(struct sock *sk,
 					   struct sk_buff *skb,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index b74f761..17ee85c 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -271,7 +271,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 							 &ireq6->loc_addr,
 							 &ireq6->rmt_addr);
 		ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
-		err = ip6_xmit(sk, skb, &fl6, opt);
+		err = ip6_xmit(sk, skb, &fl6, opt, np->tclass);
 		err = net_xmit_eval(err);
 	}
 
@@ -326,7 +326,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 	dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
 	if (!IS_ERR(dst)) {
 		skb_dst_set(skb, dst);
-		ip6_xmit(ctl_sk, skb, &fl6, NULL);
+		ip6_xmit(ctl_sk, skb, &fl6, NULL, 0);
 		DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
 		DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
 		return;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 85a2fbe..66363b6 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -345,6 +345,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 			tw6 = inet6_twsk((struct sock *)tw);
 			ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
 			ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
+			tw->tw_tclass = np->tclass;
 			tw->tw_ipv6only = np->ipv6only;
 		}
 #endif
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 2916200..fee46d5 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -248,7 +248,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
 	/* Restore final destination back after routing done */
 	ipv6_addr_copy(&fl6.daddr, &np->daddr);
 
-	res = ip6_xmit(sk, skb, &fl6, np->opt);
+	res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
 	rcu_read_unlock();
 	return res;
 }
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1c9bf8b..ff30047 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -180,7 +180,7 @@ int ip6_output(struct sk_buff *skb)
  */
 
 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
-	     struct ipv6_txoptions *opt)
+	     struct ipv6_txoptions *opt, int tclass)
 {
 	struct net *net = sock_net(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -190,7 +190,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 	u8  proto = fl6->flowi6_proto;
 	int seg_len = skb->len;
 	int hlimit = -1;
-	int tclass = 0;
 	u32 mtu;
 
 	if (opt) {
@@ -228,10 +227,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 	/*
 	 *	Fill in the IPv6 header
 	 */
-	if (np) {
-		tclass = np->tclass;
+	if (np)
 		hlimit = np->hop_limit;
-	}
 	if (hlimit < 0)
 		hlimit = ip6_dst_hoplimit(dst);
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c8683fc..10b2b31 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -513,7 +513,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 		__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
 
 		ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
-		err = ip6_xmit(sk, skb, &fl6, opt);
+		err = ip6_xmit(sk, skb, &fl6, opt, np->tclass);
 		err = net_xmit_eval(err);
 	}
 
@@ -979,7 +979,7 @@ static int tcp6_gro_complete(struct sk_buff *skb)
 }
 
 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
-				 u32 ts, struct tcp_md5sig_key *key, int rst)
+				 u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass)
 {
 	const struct tcphdr *th = tcp_hdr(skb);
 	struct tcphdr *t1;
@@ -1060,7 +1060,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 	dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
 	if (!IS_ERR(dst)) {
 		skb_dst_set(buff, dst);
-		ip6_xmit(ctl_sk, buff, &fl6, NULL);
+		ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
 		TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 		if (rst)
 			TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
@@ -1093,13 +1093,13 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
 			  (th->doff << 2);
 
-	tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1);
+	tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0);
 }
 
 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
-			    struct tcp_md5sig_key *key)
+			    struct tcp_md5sig_key *key, u8 tclass)
 {
-	tcp_v6_send_response(skb, seq, ack, win, ts, key, 0);
+	tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass);
 }
 
 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -1109,7 +1109,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
 
 	tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
-			tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw));
+			tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
+			tw->tw_tclass);
 
 	inet_twsk_put(tw);
 }
@@ -1118,7 +1119,7 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 				  struct request_sock *req)
 {
 	tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
-			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr));
+			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0);
 }
 
 
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index aabaee4..8104278 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -243,7 +243,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 	if (!(transport->param_flags & SPP_PMTUD_ENABLE))
 		skb->local_df = 1;
 
-	return ip6_xmit(sk, skb, &fl6, np->opt);
+	return ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
 }
 
 /* Returns the dst cache entry for the given source and destination ip

^ permalink raw reply related

* Re: [PATCH] ipv6: tcp: fix TCLASS value in ACK messages sent from TIME_WAIT
From: Eric Dumazet @ 2011-10-27  4:30 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20111026.155908.1133603039272144196.davem@davemloft.net>

Le mercredi 26 octobre 2011 à 15:59 -0400, David Miller a écrit :

> Did you happen to check to see that this does the right thing for
> V4 mapped ipv6 sockets?
> 

This is handled by prior patch, and this new patch doesnt break it

(tw->tw_tclass is overwritten by tclass, only on true/AF_INET6 ipv6
sockets)

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox