* [RFC PATCH net-next 0/2] Add new switchdev device class
@ 2015-08-27 7:16 sfeldma
2015-08-27 7:16 ` [RFC PATCH net-next 1/2] switchdev: create " sfeldma
` (7 more replies)
0 siblings, 8 replies; 22+ messages in thread
From: sfeldma @ 2015-08-27 7:16 UTC (permalink / raw)
To: netdev; +Cc: jiri, davem, f.fainelli, roopa
From: Scott Feldman <sfeldma@gmail.com>
In the switchdev model, we use netdevs to represent switchdev ports, but we
have no representation for the switch itself. So, introduce a new switchdev
device class so we can define semantics and programming interfaces for the
switch itself. Switchdev device class isn't tied to any particular bus.
This patch set is just the skeleton to get us started. It adds the sysfs
object registration for the new class and defines a class-level attr "foo".
With the new class, we could hook PM functions, for example, to handle power
transitions at the switch level. I registered rocker and get:
$ ls /sys/class/switchdev/5254001235010000/
foo power subsystem uevent
So what next? I'd rather not build APIs around sysfs, so we need a netlink API
we can build on top of this. It's not really rtnl. Maybe genl would work?
What ever it is, we'd need to teach iproute2 about a new 'switch' command.
Netlink API would allow us to represent switch-wide objects such as registers,
tables, stats, firmware, and maybe even control. I think with with netlink
TLVs, we can create a framework for these objects but still allow the switch
driver provide switch-specific info. For example, a table object:
[TABLES]
[TABLE]
[FIELDS]
[FIELD]
[ID, TYPE]
[DATA]
[ID, VALUE]
Maybe iproute2 has pretty-printers for specific switches like ethtool has for
reg dumps.
I don't know about how this overlaps with DSA platform_class. Florian?
Comments?
Scott Feldman (2):
switchdev: create new switchdev device class
rocker: register each switch as a switchdev
drivers/net/ethernet/rocker/rocker.c | 23 ++++++++++
include/net/switchdev.h | 16 +++++++
net/switchdev/switchdev.c | 76 ++++++++++++++++++++++++++++++++++
3 files changed, 115 insertions(+)
--
1.7.10.4
^ permalink raw reply [flat|nested] 22+ messages in thread
* [RFC PATCH net-next 1/2] switchdev: create new switchdev device class
2015-08-27 7:16 [RFC PATCH net-next 0/2] Add new switchdev device class sfeldma
@ 2015-08-27 7:16 ` sfeldma
2015-08-27 7:16 ` [RFC PATCH net-next 2/2] rocker: register each switch as a switchdev sfeldma
` (6 subsequent siblings)
7 siblings, 0 replies; 22+ messages in thread
From: sfeldma @ 2015-08-27 7:16 UTC (permalink / raw)
To: netdev; +Cc: jiri, davem, f.fainelli, roopa
From: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
---
include/net/switchdev.h | 16 ++++++++++
net/switchdev/switchdev.c | 76 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 92 insertions(+)
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 319baab..d61e73c 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -16,6 +16,11 @@
#define SWITCHDEV_F_NO_RECURSE BIT(0)
+struct switchdev {
+ struct device dev;
+ atomic_t foo;
+};
+
enum switchdev_trans {
SWITCHDEV_TRANS_NONE,
SWITCHDEV_TRANS_PREPARE,
@@ -126,6 +131,8 @@ switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info)
#ifdef CONFIG_NET_SWITCHDEV
+int register_switchdev(struct switchdev *sdev, const char *name);
+void unregister_switchdev(struct switchdev *sdev);
int switchdev_port_attr_get(struct net_device *dev,
struct switchdev_attr *attr);
int switchdev_port_attr_set(struct net_device *dev,
@@ -164,6 +171,15 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
#else
+static inline int register_switchdev(struct switchdev *sdev, const char *name)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void unregister_switchdev(struct switchdev *sdev)
+{
+}
+
static inline int switchdev_port_attr_get(struct net_device *dev,
struct switchdev_attr *attr)
{
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 16c1c43..f705202 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -10,6 +10,7 @@
*/
#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/mutex.h>
@@ -19,6 +20,63 @@
#include <net/ip_fib.h>
#include <net/switchdev.h>
+#define to_switchdev(d) container_of(d, struct switchdev, dev)
+
+static void switchdev_release(struct device *dev)
+{
+}
+
+static ssize_t foo_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct switchdev *switchdev = to_switchdev(dev);
+
+ return sprintf(buf, "%d\n", atomic_read(&switchdev->foo));
+}
+
+static DEVICE_ATTR_RO(foo);
+
+static struct attribute *switchdev_attrs[] = {
+ &dev_attr_foo.attr
+};
+
+ATTRIBUTE_GROUPS(switchdev);
+
+static int switchdev_uevent(struct device *d, struct kobj_uevent_env *env)
+{
+ return 0;
+}
+
+static struct class switchdev_class = {
+ .name = "switchdev",
+ .dev_release = switchdev_release,
+ .dev_groups = switchdev_groups,
+ .dev_uevent = switchdev_uevent,
+};
+
+int register_switchdev(struct switchdev *sdev, const char *name)
+{
+ struct device *dev = &sdev->dev;
+ int err;
+
+ device_initialize(dev);
+
+ dev->class = &switchdev_class;
+
+ err = dev_set_name(dev, "%s", name);
+ if (err)
+ return err;
+
+ return device_add(dev);
+}
+EXPORT_SYMBOL_GPL(register_switchdev);
+
+void unregister_switchdev(struct switchdev *sdev)
+{
+ put_device(&sdev->dev);
+}
+EXPORT_SYMBOL_GPL(unregister_switchdev);
+
/**
* switchdev_port_attr_get - Get port attribute
*
@@ -1142,3 +1200,21 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
dev->offload_fwd_mark = mark;
}
EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set);
+
+static int __init switchdev_module_init(void)
+{
+ return class_register(&switchdev_class);
+}
+
+static void __exit switchdev_module_exit(void)
+{
+ class_unregister(&switchdev_class);
+}
+
+module_init(switchdev_module_init);
+module_exit(switchdev_module_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
+MODULE_AUTHOR("Scott Feldman <sfeldma@gmail.com>");
+MODULE_DESCRIPTION("Ethernet switch device model");
--
1.7.10.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [RFC PATCH net-next 2/2] rocker: register each switch as a switchdev
2015-08-27 7:16 [RFC PATCH net-next 0/2] Add new switchdev device class sfeldma
2015-08-27 7:16 ` [RFC PATCH net-next 1/2] switchdev: create " sfeldma
@ 2015-08-27 7:16 ` sfeldma
2015-08-27 7:27 ` [RFC PATCH net-next 0/2] Add new switchdev device class Jiri Pirko
` (5 subsequent siblings)
7 siblings, 0 replies; 22+ messages in thread
From: sfeldma @ 2015-08-27 7:16 UTC (permalink / raw)
To: netdev; +Cc: jiri, davem, f.fainelli, roopa
From: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
---
drivers/net/ethernet/rocker/rocker.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
index a7cb74a..9555ae4 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -233,6 +233,7 @@ struct rocker {
struct pci_dev *pdev;
u8 __iomem *hw_addr;
struct msix_entry *msix_entries;
+ struct switchdev switchdev;
unsigned int port_count;
struct rocker_port **ports;
struct {
@@ -5090,6 +5091,19 @@ static void rocker_msix_fini(const struct rocker *rocker)
kfree(rocker->msix_entries);
}
+static int rocker_probe_register_switchdev(struct rocker *rocker)
+{
+ char name[sizeof(rocker->hw.id) * 2 + 1];
+
+ sprintf(name, "%*phN", (int)sizeof(rocker->hw.id), &rocker->hw.id);
+ return register_switchdev(&rocker->switchdev, name);
+}
+
+static void rocker_probe_unregister_switchdev(struct rocker *rocker)
+{
+ unregister_switchdev(&rocker->switchdev);
+}
+
static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct rocker *rocker;
@@ -5194,11 +5208,19 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_probe_ports;
}
+ err = rocker_probe_register_switchdev(rocker);
+ if (err) {
+ dev_err(&pdev->dev, "cannot register switchdev\n");
+ goto err_register_switchdev;
+ }
+
dev_info(&pdev->dev, "Rocker switch with id %*phN\n",
(int)sizeof(rocker->hw.id), &rocker->hw.id);
return 0;
+err_register_switchdev:
+ rocker_remove_ports(rocker);
err_probe_ports:
rocker_free_tbls(rocker);
err_init_tbls:
@@ -5227,6 +5249,7 @@ static void rocker_remove(struct pci_dev *pdev)
{
struct rocker *rocker = pci_get_drvdata(pdev);
+ rocker_probe_unregister_switchdev(rocker);
rocker_free_tbls(rocker);
rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET);
rocker_remove_ports(rocker);
--
1.7.10.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [RFC PATCH net-next 0/2] Add new switchdev device class
2015-08-27 7:16 [RFC PATCH net-next 0/2] Add new switchdev device class sfeldma
2015-08-27 7:16 ` [RFC PATCH net-next 1/2] switchdev: create " sfeldma
2015-08-27 7:16 ` [RFC PATCH net-next 2/2] rocker: register each switch as a switchdev sfeldma
@ 2015-08-27 7:27 ` Jiri Pirko
2015-08-27 7:43 ` John Fastabend
2015-08-27 8:17 ` Scott Feldman
2015-08-27 7:36 ` John Fastabend
` (4 subsequent siblings)
7 siblings, 2 replies; 22+ messages in thread
From: Jiri Pirko @ 2015-08-27 7:27 UTC (permalink / raw)
To: sfeldma; +Cc: netdev, davem, f.fainelli, roopa
Thu, Aug 27, 2015 at 09:16:44AM CEST, sfeldma@gmail.com wrote:
>From: Scott Feldman <sfeldma@gmail.com>
>
>In the switchdev model, we use netdevs to represent switchdev ports, but we
>have no representation for the switch itself. So, introduce a new switchdev
>device class so we can define semantics and programming interfaces for the
>switch itself. Switchdev device class isn't tied to any particular bus.
>
>This patch set is just the skeleton to get us started. It adds the sysfs
>object registration for the new class and defines a class-level attr "foo".
>With the new class, we could hook PM functions, for example, to handle power
>transitions at the switch level. I registered rocker and get:
>
> $ ls /sys/class/switchdev/5254001235010000/
> foo power subsystem uevent
No, please avoid adding anything to sysfs. If we need to add anything,
lets make is accesible using Netlink only.
>
>So what next? I'd rather not build APIs around sysfs, so we need a netlink API
>we can build on top of this. It's not really rtnl. Maybe genl would work?
>What ever it is, we'd need to teach iproute2 about a new 'switch' command.
>
>Netlink API would allow us to represent switch-wide objects such as registers,
>tables, stats, firmware, and maybe even control. I think with with netlink
>TLVs, we can create a framework for these objects but still allow the switch
>driver provide switch-specific info. For example, a table object:
>
>[TABLES]
> [TABLE]
> [FIELDS]
> [FIELD]
> [ID, TYPE]
> [DATA]
> [ID, VALUE]
Alert! I feel that someone would like to abuse this iface for writing
configuration through. This should be read-only by design. I also think
that this should not be something switch-specific. I believe that NIC
drivers would benefit from this iface as well when they want to expose
something. I think we should use genl for this.
>
>Maybe iproute2 has pretty-printers for specific switches like ethtool has for
>reg dumps.
I feel like a lot of what you described overlaps with existing
interfaces and tools. Why don't we just reuse that? For firmware for
example, just take one of the ports. Same for stats (I plan to expose my
mlxsw switch-wide stats in ethtool so they are accessible through every
port netdevice).
I still do not see the need for new device class. I have strong feeling
that it should be avoided.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [RFC PATCH net-next 0/2] Add new switchdev device class
2015-08-27 7:16 [RFC PATCH net-next 0/2] Add new switchdev device class sfeldma
` (2 preceding siblings ...)
2015-08-27 7:27 ` [RFC PATCH net-next 0/2] Add new switchdev device class Jiri Pirko
@ 2015-08-27 7:36 ` John Fastabend
2015-08-27 7:44 ` Jiri Pirko
2015-08-27 8:23 ` Scott Feldman
2015-08-27 7:45 ` Andrew Lunn
` (3 subsequent siblings)
7 siblings, 2 replies; 22+ messages in thread
From: John Fastabend @ 2015-08-27 7:36 UTC (permalink / raw)
To: sfeldma, netdev; +Cc: jiri, davem, f.fainelli, roopa
On 15-08-27 12:16 AM, sfeldma@gmail.com wrote:
> From: Scott Feldman <sfeldma@gmail.com>
>
> In the switchdev model, we use netdevs to represent switchdev ports, but we
> have no representation for the switch itself. So, introduce a new switchdev
> device class so we can define semantics and programming interfaces for the
> switch itself. Switchdev device class isn't tied to any particular bus.
>
> This patch set is just the skeleton to get us started. It adds the sysfs
> object registration for the new class and defines a class-level attr "foo".
> With the new class, we could hook PM functions, for example, to handle power
> transitions at the switch level. I registered rocker and get:
>
> $ ls /sys/class/switchdev/5254001235010000/
> foo power subsystem uevent
>
> So what next? I'd rather not build APIs around sysfs, so we need a netlink API
> we can build on top of this. It's not really rtnl. Maybe genl would work?
> What ever it is, we'd need to teach iproute2 about a new 'switch' command.
>
> Netlink API would allow us to represent switch-wide objects such as registers,
> tables, stats, firmware, and maybe even control. I think with with netlink
> TLVs, we can create a framework for these objects but still allow the switch
> driver provide switch-specific info. For example, a table object:
>
Hi Scott,
I was going to take the flow-api presented in Feb and submitted
to netdev and get it up to date. I started doing this today should be
ready end of week.
> [TABLES]
> [TABLE]
> [FIELDS]
> [FIELD]
> [ID, TYPE]
> [DATA]
> [ID, VALUE]
>
The structure I used previously which looks like your prototype I think,
(https://github.com/jrfastab/rocker-net-next/blob/master/include/uapi/linux/if_flow.h)
* [NFL_TABLE_IDENTIFIER_TYPE]
* [NFL_TABLE_IDENTIFIER]
* [NFL_TABLE_TABLES]
* [NFL_TABLE]
* [NFL_TABLE_ATTR_NAME]
* [NFL_TABLE_ATTR_UID]
* [NFL_TABLE_ATTR_SOURCE]
* [NFL_TABLE_ATTR_APPLY]
* [NFL_TABLE_ATTR_SIZE]
* [NFL_TABLE_ATTR_MATCHES]
* [NFL_FIELD_REF]
* [NFL_FIELD_REF_INSTANCE]
* [NFL_FIELD_REF_HEADER]
* [NFL_FIELD_REF_FIELD]
* [NFL_FIELD_REF_MASK]
* [NFL_FIELD_REF_TYPE]
* [...]
* [NFL_TABLE_ATTR_ACTIONS]
* [NFL_ACTION_ATTR_UID]
* [...]
* [NFL_TABLE]
* [...]
*
This is well-typed per Dave's comment. And because its Netlink based it
can be easily extended as needed. The above gives basic information on
the table. Sure it could stand to have some other entries in it but I
never needed them for my capabilities/resource discovery. We could argue
about removing some if they are too specific to my use cases.
> Maybe iproute2 has pretty-printers for specific switches like ethtool has for
> reg dumps.
>
> I don't know about how this overlaps with DSA platform_class. Florian?
>
> Comments?
A few other interesting (at least to me) structures you can find in the
if_flow.h header file give how the tables in the hardware are put
together. I found this really useful when trying to sort out where my
various ACLs/nexthop entries/etc were in the hardware order. This is
important to know for many cases. This is NFL_TABLE_GRAPH.
Also I found being able to "know' what headers the hardware supports
to be very useful. For example I want/need to know if QinQ will work.
Or VXLAN/NSH, Geneve, etc. This is NFL_HEADER_GRAPH.
Sure if_flow.h is a poor name and FlowAPI is probably not really great.
But both those names come from how I model the hardware as match action
tables. I can change those names to switchdev_resources or
switchdev_info if folks want.
My point is the interface is generic and provides a common interface for
most hardware I've seen to date. Certainly curious if there is hardware
it doesn't map to. Although tables and pipelines seems fairly universal
for a large set of devices to me. Further it can be extended as needed.
I'll drop the set_rule/del_rule parts to be merged with ebpf/tc/qdisc.
.John
>
>
> Scott Feldman (2):
> switchdev: create new switchdev device class
> rocker: register each switch as a switchdev
>
> drivers/net/ethernet/rocker/rocker.c | 23 ++++++++++
> include/net/switchdev.h | 16 +++++++
> net/switchdev/switchdev.c | 76 ++++++++++++++++++++++++++++++++++
> 3 files changed, 115 insertions(+)
>
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [RFC PATCH net-next 0/2] Add new switchdev device class
2015-08-27 7:27 ` [RFC PATCH net-next 0/2] Add new switchdev device class Jiri Pirko
@ 2015-08-27 7:43 ` John Fastabend
2015-08-27 7:51 ` Jiri Pirko
2015-08-27 8:17 ` Scott Feldman
1 sibling, 1 reply; 22+ messages in thread
From: John Fastabend @ 2015-08-27 7:43 UTC (permalink / raw)
To: Jiri Pirko, sfeldma; +Cc: netdev, davem, f.fainelli, roopa
On 15-08-27 12:27 AM, Jiri Pirko wrote:
> Thu, Aug 27, 2015 at 09:16:44AM CEST, sfeldma@gmail.com wrote:
>> From: Scott Feldman <sfeldma@gmail.com>
>>
>> In the switchdev model, we use netdevs to represent switchdev ports, but we
>> have no representation for the switch itself. So, introduce a new switchdev
>> device class so we can define semantics and programming interfaces for the
>> switch itself. Switchdev device class isn't tied to any particular bus.
>>
>> This patch set is just the skeleton to get us started. It adds the sysfs
>> object registration for the new class and defines a class-level attr "foo".
>> With the new class, we could hook PM functions, for example, to handle power
>> transitions at the switch level. I registered rocker and get:
>>
>> $ ls /sys/class/switchdev/5254001235010000/
>> foo power subsystem uevent
>
> No, please avoid adding anything to sysfs. If we need to add anything,
> lets make is accesible using Netlink only.
>
>
>>
>> So what next? I'd rather not build APIs around sysfs, so we need a netlink API
>> we can build on top of this. It's not really rtnl. Maybe genl would work?
>> What ever it is, we'd need to teach iproute2 about a new 'switch' command.
>>
>> Netlink API would allow us to represent switch-wide objects such as registers,
>> tables, stats, firmware, and maybe even control. I think with with netlink
>> TLVs, we can create a framework for these objects but still allow the switch
>> driver provide switch-specific info. For example, a table object:
>>
>> [TABLES]
>> [TABLE]
>> [FIELDS]
>> [FIELD]
>> [ID, TYPE]
>> [DATA]
>> [ID, VALUE]
>
> Alert! I feel that someone would like to abuse this iface for writing
> configuration through. This should be read-only by design. I also think
> that this should not be something switch-specific. I believe that NIC
> drivers would benefit from this iface as well when they want to expose
> something. I think we should use genl for this.
>
One place where read-only may not make sense is when the tables can
be provisioned/configured. Many switches have the ability to be
configured with "profiles". For a simple example some hardware use a
single table that can be divided into an IPv4 and an IPv6 section.
We don't have an interface to do this today. And I don't want to
see this being "shipped" as magic firmware updates. So a well-defined
netlink interface seems the best approach to me.
Of course like any UAPI we should be a bit cautious adding new bits if
we can't remove them.
>
>>
>> Maybe iproute2 has pretty-printers for specific switches like ethtool has for
>> reg dumps.
>
> I feel like a lot of what you described overlaps with existing
> interfaces and tools. Why don't we just reuse that? For firmware for
> example, just take one of the ports. Same for stats (I plan to expose my
> mlxsw switch-wide stats in ethtool so they are accessible through every
> port netdevice).
>
> I still do not see the need for new device class. I have strong feeling
> that it should be avoided.
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [RFC PATCH net-next 0/2] Add new switchdev device class
2015-08-27 7:36 ` John Fastabend
@ 2015-08-27 7:44 ` Jiri Pirko
2015-08-27 8:09 ` John Fastabend
2015-08-27 8:23 ` Scott Feldman
1 sibling, 1 reply; 22+ messages in thread
From: Jiri Pirko @ 2015-08-27 7:44 UTC (permalink / raw)
To: John Fastabend; +Cc: sfeldma, netdev, davem, f.fainelli, roopa
Thu, Aug 27, 2015 at 09:36:20AM CEST, john.fastabend@gmail.com wrote:
>On 15-08-27 12:16 AM, sfeldma@gmail.com wrote:
>> From: Scott Feldman <sfeldma@gmail.com>
>>
>> In the switchdev model, we use netdevs to represent switchdev ports, but we
>> have no representation for the switch itself. So, introduce a new switchdev
>> device class so we can define semantics and programming interfaces for the
>> switch itself. Switchdev device class isn't tied to any particular bus.
>>
>> This patch set is just the skeleton to get us started. It adds the sysfs
>> object registration for the new class and defines a class-level attr "foo".
>> With the new class, we could hook PM functions, for example, to handle power
>> transitions at the switch level. I registered rocker and get:
>>
>> $ ls /sys/class/switchdev/5254001235010000/
>> foo power subsystem uevent
>>
>> So what next? I'd rather not build APIs around sysfs, so we need a netlink API
>> we can build on top of this. It's not really rtnl. Maybe genl would work?
>> What ever it is, we'd need to teach iproute2 about a new 'switch' command.
>>
>> Netlink API would allow us to represent switch-wide objects such as registers,
>> tables, stats, firmware, and maybe even control. I think with with netlink
>> TLVs, we can create a framework for these objects but still allow the switch
>> driver provide switch-specific info. For example, a table object:
>>
>
>Hi Scott,
>
>I was going to take the flow-api presented in Feb and submitted
>to netdev and get it up to date. I started doing this today should be
>ready end of week.
>
>> [TABLES]
>> [TABLE]
>> [FIELDS]
>> [FIELD]
>> [ID, TYPE]
>> [DATA]
>> [ID, VALUE]
>>
>
>The structure I used previously which looks like your prototype I think,
>
> (https://github.com/jrfastab/rocker-net-next/blob/master/include/uapi/linux/if_flow.h)
>
> * [NFL_TABLE_IDENTIFIER_TYPE]
> * [NFL_TABLE_IDENTIFIER]
> * [NFL_TABLE_TABLES]
> * [NFL_TABLE]
> * [NFL_TABLE_ATTR_NAME]
> * [NFL_TABLE_ATTR_UID]
> * [NFL_TABLE_ATTR_SOURCE]
> * [NFL_TABLE_ATTR_APPLY]
> * [NFL_TABLE_ATTR_SIZE]
> * [NFL_TABLE_ATTR_MATCHES]
> * [NFL_FIELD_REF]
> * [NFL_FIELD_REF_INSTANCE]
> * [NFL_FIELD_REF_HEADER]
> * [NFL_FIELD_REF_FIELD]
> * [NFL_FIELD_REF_MASK]
> * [NFL_FIELD_REF_TYPE]
> * [...]
> * [NFL_TABLE_ATTR_ACTIONS]
> * [NFL_ACTION_ATTR_UID]
> * [...]
> * [NFL_TABLE]
> * [...]
> *
>
>This is well-typed per Dave's comment. And because its Netlink based it
>can be easily extended as needed. The above gives basic information on
>the table. Sure it could stand to have some other entries in it but I
>never needed them for my capabilities/resource discovery. We could argue
>about removing some if they are too specific to my use cases.
I guess you are talking about read-only interface right?
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [RFC PATCH net-next 0/2] Add new switchdev device class
2015-08-27 7:16 [RFC PATCH net-next 0/2] Add new switchdev device class sfeldma
` (3 preceding siblings ...)
2015-08-27 7:36 ` John Fastabend
@ 2015-08-27 7:45 ` Andrew Lunn
2015-08-27 8:42 ` Scott Feldman
2015-08-27 13:54 ` John W. Linville
` (2 subsequent siblings)
7 siblings, 1 reply; 22+ messages in thread
From: Andrew Lunn @ 2015-08-27 7:45 UTC (permalink / raw)
To: sfeldma; +Cc: netdev, jiri, davem, f.fainelli, roopa
> I don't know about how this overlaps with DSA platform_class. Florian?
There is some overlap with DSA, but the current DSA model, with
respect to probing, is broken. So this might be interesting as a way
towards fix that.
One thing to keep in mind is the D in DSA. You talk about switch,
singular. DSA has a number of switches in a cluster. We currently
export a single switchdev interface for the cluster, but there are
some properties which are per switch, e.g. temperature, eeprom
contents, statistics, power management etc.
Although ethtool does have options for these, it is not always a
natural fit. ethtool --eeprom-dump on a switch port dumps the switch
EEPROM, and all ports on the switch can be used. --register-dump on a
port is good for showing the per ports registers, but there is no
natural interface for showing the global switch registers. Florian's
resent L2 interface patch shows we have issues getting access to the
'cpu' port, the port which interfaces to the host.
We need to be careful that any new interfaces we add are better at
representing the true structure of the hardware, which includes there
being multiple physical switches below a switchdev, and they are
connected together by ports which are currently not visible as
netdevs.
Andrew
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [RFC PATCH net-next 0/2] Add new switchdev device class
2015-08-27 7:43 ` John Fastabend
@ 2015-08-27 7:51 ` Jiri Pirko
2015-08-27 8:14 ` John Fastabend
0 siblings, 1 reply; 22+ messages in thread
From: Jiri Pirko @ 2015-08-27 7:51 UTC (permalink / raw)
To: John Fastabend; +Cc: sfeldma, netdev, davem, f.fainelli, roopa
Thu, Aug 27, 2015 at 09:43:54AM CEST, john.fastabend@gmail.com wrote:
>On 15-08-27 12:27 AM, Jiri Pirko wrote:
>> Thu, Aug 27, 2015 at 09:16:44AM CEST, sfeldma@gmail.com wrote:
>>> From: Scott Feldman <sfeldma@gmail.com>
>>>
>>> In the switchdev model, we use netdevs to represent switchdev ports, but we
>>> have no representation for the switch itself. So, introduce a new switchdev
>>> device class so we can define semantics and programming interfaces for the
>>> switch itself. Switchdev device class isn't tied to any particular bus.
>>>
>>> This patch set is just the skeleton to get us started. It adds the sysfs
>>> object registration for the new class and defines a class-level attr "foo".
>>> With the new class, we could hook PM functions, for example, to handle power
>>> transitions at the switch level. I registered rocker and get:
>>>
>>> $ ls /sys/class/switchdev/5254001235010000/
>>> foo power subsystem uevent
>>
>> No, please avoid adding anything to sysfs. If we need to add anything,
>> lets make is accesible using Netlink only.
>>
>>
>>>
>>> So what next? I'd rather not build APIs around sysfs, so we need a netlink API
>>> we can build on top of this. It's not really rtnl. Maybe genl would work?
>>> What ever it is, we'd need to teach iproute2 about a new 'switch' command.
>>>
>>> Netlink API would allow us to represent switch-wide objects such as registers,
>>> tables, stats, firmware, and maybe even control. I think with with netlink
>>> TLVs, we can create a framework for these objects but still allow the switch
>>> driver provide switch-specific info. For example, a table object:
>>>
>>> [TABLES]
>>> [TABLE]
>>> [FIELDS]
>>> [FIELD]
>>> [ID, TYPE]
>>> [DATA]
>>> [ID, VALUE]
>>
>> Alert! I feel that someone would like to abuse this iface for writing
>> configuration through. This should be read-only by design. I also think
>> that this should not be something switch-specific. I believe that NIC
>> drivers would benefit from this iface as well when they want to expose
>> something. I think we should use genl for this.
>>
>
>One place where read-only may not make sense is when the tables can
>be provisioned/configured. Many switches have the ability to be
>configured with "profiles". For a simple example some hardware use a
>single table that can be divided into an IPv4 and an IPv6 section.
Okay. That should be configured via separate configuration Netlink
interface - ConfNetlink. I already spoke with Dave about need for
that one: Netlink based, use PCI-addr (other addr) as a handle,
well-defined config objects. The need to this interface is bigger and bigger.
I can cook-up some RFC patch so you see what I'm talking about.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [RFC PATCH net-next 0/2] Add new switchdev device class
2015-08-27 7:44 ` Jiri Pirko
@ 2015-08-27 8:09 ` John Fastabend
0 siblings, 0 replies; 22+ messages in thread
From: John Fastabend @ 2015-08-27 8:09 UTC (permalink / raw)
To: Jiri Pirko; +Cc: sfeldma, netdev, davem, f.fainelli, roopa
[...]
>>
>> The structure I used previously which looks like your prototype I think,
>>
>> (https://github.com/jrfastab/rocker-net-next/blob/master/include/uapi/linux/if_flow.h)
>>
>> * [NFL_TABLE_IDENTIFIER_TYPE]
>> * [NFL_TABLE_IDENTIFIER]
>> * [NFL_TABLE_TABLES]
>> * [NFL_TABLE]
>> * [NFL_TABLE_ATTR_NAME]
>> * [NFL_TABLE_ATTR_UID]
>> * [NFL_TABLE_ATTR_SOURCE]
>> * [NFL_TABLE_ATTR_APPLY]
>> * [NFL_TABLE_ATTR_SIZE]
>> * [NFL_TABLE_ATTR_MATCHES]
>> * [NFL_FIELD_REF]
>> * [NFL_FIELD_REF_INSTANCE]
>> * [NFL_FIELD_REF_HEADER]
>> * [NFL_FIELD_REF_FIELD]
>> * [NFL_FIELD_REF_MASK]
>> * [NFL_FIELD_REF_TYPE]
>> * [...]
>> * [NFL_TABLE_ATTR_ACTIONS]
>> * [NFL_ACTION_ATTR_UID]
>> * [...]
>> * [NFL_TABLE]
>> * [...]
>> *
>>
>> This is well-typed per Dave's comment. And because its Netlink based it
>> can be easily extended as needed. The above gives basic information on
>> the table. Sure it could stand to have some other entries in it but I
>> never needed them for my capabilities/resource discovery. We could argue
>> about removing some if they are too specific to my use cases.
>
> I guess you are talking about read-only interface right?
>
Correct. read-only.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [RFC PATCH net-next 0/2] Add new switchdev device class
2015-08-27 7:51 ` Jiri Pirko
@ 2015-08-27 8:14 ` John Fastabend
0 siblings, 0 replies; 22+ messages in thread
From: John Fastabend @ 2015-08-27 8:14 UTC (permalink / raw)
To: Jiri Pirko; +Cc: sfeldma, netdev, davem, f.fainelli, roopa
On 15-08-27 12:51 AM, Jiri Pirko wrote:
> Thu, Aug 27, 2015 at 09:43:54AM CEST, john.fastabend@gmail.com wrote:
>> On 15-08-27 12:27 AM, Jiri Pirko wrote:
>>> Thu, Aug 27, 2015 at 09:16:44AM CEST, sfeldma@gmail.com wrote:
>>>> From: Scott Feldman <sfeldma@gmail.com>
>>>>
>>>> In the switchdev model, we use netdevs to represent switchdev ports, but we
>>>> have no representation for the switch itself. So, introduce a new switchdev
>>>> device class so we can define semantics and programming interfaces for the
>>>> switch itself. Switchdev device class isn't tied to any particular bus.
>>>>
>>>> This patch set is just the skeleton to get us started. It adds the sysfs
>>>> object registration for the new class and defines a class-level attr "foo".
>>>> With the new class, we could hook PM functions, for example, to handle power
>>>> transitions at the switch level. I registered rocker and get:
>>>>
>>>> $ ls /sys/class/switchdev/5254001235010000/
>>>> foo power subsystem uevent
>>>
>>> No, please avoid adding anything to sysfs. If we need to add anything,
>>> lets make is accesible using Netlink only.
>>>
>>>
>>>>
>>>> So what next? I'd rather not build APIs around sysfs, so we need a netlink API
>>>> we can build on top of this. It's not really rtnl. Maybe genl would work?
>>>> What ever it is, we'd need to teach iproute2 about a new 'switch' command.
>>>>
>>>> Netlink API would allow us to represent switch-wide objects such as registers,
>>>> tables, stats, firmware, and maybe even control. I think with with netlink
>>>> TLVs, we can create a framework for these objects but still allow the switch
>>>> driver provide switch-specific info. For example, a table object:
>>>>
>>>> [TABLES]
>>>> [TABLE]
>>>> [FIELDS]
>>>> [FIELD]
>>>> [ID, TYPE]
>>>> [DATA]
>>>> [ID, VALUE]
>>>
>>> Alert! I feel that someone would like to abuse this iface for writing
>>> configuration through. This should be read-only by design. I also think
>>> that this should not be something switch-specific. I believe that NIC
>>> drivers would benefit from this iface as well when they want to expose
>>> something. I think we should use genl for this.
>>>
>>
>> One place where read-only may not make sense is when the tables can
>> be provisioned/configured. Many switches have the ability to be
>> configured with "profiles". For a simple example some hardware use a
>> single table that can be divided into an IPv4 and an IPv6 section.
>
> Okay. That should be configured via separate configuration Netlink
> interface - ConfNetlink. I already spoke with Dave about need for
> that one: Netlink based, use PCI-addr (other addr) as a handle,
> well-defined config objects. The need to this interface is bigger and bigger.
>
> I can cook-up some RFC patch so you see what I'm talking about.
>
Great. I originally buried it in the above API but maybe its best
to keep them separate. I'll take a look at your RFC patches when
they hit the list.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [RFC PATCH net-next 0/2] Add new switchdev device class
2015-08-27 7:27 ` [RFC PATCH net-next 0/2] Add new switchdev device class Jiri Pirko
2015-08-27 7:43 ` John Fastabend
@ 2015-08-27 8:17 ` Scott Feldman
2015-08-27 8:41 ` Jiri Pirko
1 sibling, 1 reply; 22+ messages in thread
From: Scott Feldman @ 2015-08-27 8:17 UTC (permalink / raw)
To: Jiri Pirko; +Cc: Netdev, David S. Miller, Florian Fainelli, Roopa Prabhu
On Thu, Aug 27, 2015 at 12:27 AM, Jiri Pirko <jiri@resnulli.us> wrote:
> Thu, Aug 27, 2015 at 09:16:44AM CEST, sfeldma@gmail.com wrote:
>>From: Scott Feldman <sfeldma@gmail.com>
>>
>>In the switchdev model, we use netdevs to represent switchdev ports, but we
>>have no representation for the switch itself. So, introduce a new switchdev
>>device class so we can define semantics and programming interfaces for the
>>switch itself. Switchdev device class isn't tied to any particular bus.
>>
>>This patch set is just the skeleton to get us started. It adds the sysfs
>>object registration for the new class and defines a class-level attr "foo".
>>With the new class, we could hook PM functions, for example, to handle power
>>transitions at the switch level. I registered rocker and get:
>>
>> $ ls /sys/class/switchdev/5254001235010000/
>> foo power subsystem uevent
>
> No, please avoid adding anything to sysfs. If we need to add anything,
> lets make is accesible using Netlink only.
I see no harm in using the device model to define and new device class
which just so happens to show up in sysfs. What sysfs attrs get
exposed is where we can have some discussion/rules.
>>So what next? I'd rather not build APIs around sysfs, so we need a netlink API
>>we can build on top of this. It's not really rtnl. Maybe genl would work?
>>What ever it is, we'd need to teach iproute2 about a new 'switch' command.
>>
>>Netlink API would allow us to represent switch-wide objects such as registers,
>>tables, stats, firmware, and maybe even control. I think with with netlink
>>TLVs, we can create a framework for these objects but still allow the switch
>>driver provide switch-specific info. For example, a table object:
>>
>>[TABLES]
>> [TABLE]
>> [FIELDS]
>> [FIELD]
>> [ID, TYPE]
>> [DATA]
>> [ID, VALUE]
>
> Alert! I feel that someone would like to abuse this iface for writing
> configuration through. This should be read-only by design. I also think
> that this should not be something switch-specific. I believe that NIC
> drivers would benefit from this iface as well when they want to expose
> something. I think we should use genl for this.
Read-only is fine. Look, I'm just trying to dump rocker internal
tables in some format I can grep outside the kernel. The tables are
get big and complicated fast and printk doesn't cut it. I can use
degugfs privately, but I need to be able to dump same for field
troubleshooting. I can't use debugfs, so I want some kind of
XML-like dump facility. It's going to have device-specific data, so
I'm looking for an XML-like way to represent this data in netlink.
>>
>>Maybe iproute2 has pretty-printers for specific switches like ethtool has for
>>reg dumps.
>
> I feel like a lot of what you described overlaps with existing
> interfaces and tools. Why don't we just reuse that? For firmware for
> example, just take one of the ports. Same for stats (I plan to expose my
> mlxsw switch-wide stats in ethtool so they are accessible through every
> port netdevice).
Port-centric stats are fine for port netdevs, but I'd like switch-wide
stats to show up elsewhere.
Thinking ahead: I'd like to put port into namespaces and I don't want
to dump stats on a port and see stats on ports in other namespaces.
> I still do not see the need for new device class. I have strong feeling
> that it should be avoided.
Ok
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [RFC PATCH net-next 0/2] Add new switchdev device class
2015-08-27 7:36 ` John Fastabend
2015-08-27 7:44 ` Jiri Pirko
@ 2015-08-27 8:23 ` Scott Feldman
1 sibling, 0 replies; 22+ messages in thread
From: Scott Feldman @ 2015-08-27 8:23 UTC (permalink / raw)
To: John Fastabend
Cc: Netdev, Jiří Pírko, David S. Miller,
Florian Fainelli, Roopa Prabhu
On Thu, Aug 27, 2015 at 12:36 AM, John Fastabend
<john.fastabend@gmail.com> wrote:
> On 15-08-27 12:16 AM, sfeldma@gmail.com wrote:
>> From: Scott Feldman <sfeldma@gmail.com>
>>
>> In the switchdev model, we use netdevs to represent switchdev ports, but we
>> have no representation for the switch itself. So, introduce a new switchdev
>> device class so we can define semantics and programming interfaces for the
>> switch itself. Switchdev device class isn't tied to any particular bus.
>>
>> This patch set is just the skeleton to get us started. It adds the sysfs
>> object registration for the new class and defines a class-level attr "foo".
>> With the new class, we could hook PM functions, for example, to handle power
>> transitions at the switch level. I registered rocker and get:
>>
>> $ ls /sys/class/switchdev/5254001235010000/
>> foo power subsystem uevent
>>
>> So what next? I'd rather not build APIs around sysfs, so we need a netlink API
>> we can build on top of this. It's not really rtnl. Maybe genl would work?
>> What ever it is, we'd need to teach iproute2 about a new 'switch' command.
>>
>> Netlink API would allow us to represent switch-wide objects such as registers,
>> tables, stats, firmware, and maybe even control. I think with with netlink
>> TLVs, we can create a framework for these objects but still allow the switch
>> driver provide switch-specific info. For example, a table object:
>>
>
> Hi Scott,
>
> I was going to take the flow-api presented in Feb and submitted
> to netdev and get it up to date. I started doing this today should be
> ready end of week.
>
>> [TABLES]
>> [TABLE]
>> [FIELDS]
>> [FIELD]
>> [ID, TYPE]
>> [DATA]
>> [ID, VALUE]
>>
>
> The structure I used previously which looks like your prototype I think,
>
> (https://github.com/jrfastab/rocker-net-next/blob/master/include/uapi/linux/if_flow.h)
>
> * [NFL_TABLE_IDENTIFIER_TYPE]
> * [NFL_TABLE_IDENTIFIER]
> * [NFL_TABLE_TABLES]
> * [NFL_TABLE]
> * [NFL_TABLE_ATTR_NAME]
> * [NFL_TABLE_ATTR_UID]
> * [NFL_TABLE_ATTR_SOURCE]
> * [NFL_TABLE_ATTR_APPLY]
> * [NFL_TABLE_ATTR_SIZE]
> * [NFL_TABLE_ATTR_MATCHES]
> * [NFL_FIELD_REF]
> * [NFL_FIELD_REF_INSTANCE]
> * [NFL_FIELD_REF_HEADER]
> * [NFL_FIELD_REF_FIELD]
> * [NFL_FIELD_REF_MASK]
> * [NFL_FIELD_REF_TYPE]
> * [...]
> * [NFL_TABLE_ATTR_ACTIONS]
> * [NFL_ACTION_ATTR_UID]
> * [...]
> * [NFL_TABLE]
> * [...]
> *
>
> This is well-typed per Dave's comment. And because its Netlink based it
> can be easily extended as needed. The above gives basic information on
> the table. Sure it could stand to have some other entries in it but I
> never needed them for my capabilities/resource discovery. We could argue
> about removing some if they are too specific to my use cases.
I was looking for something more generic. Not quite as raw as ethtool
reg dump, but not too rigid I have to bend the def of fields to make
it work. Maybe what I want is impossible.
>> Maybe iproute2 has pretty-printers for specific switches like ethtool has for
>> reg dumps.
>>
>> I don't know about how this overlaps with DSA platform_class. Florian?
>>
>> Comments?
>
> A few other interesting (at least to me) structures you can find in the
> if_flow.h header file give how the tables in the hardware are put
> together. I found this really useful when trying to sort out where my
> various ACLs/nexthop entries/etc were in the hardware order. This is
> important to know for many cases. This is NFL_TABLE_GRAPH.
>
> Also I found being able to "know' what headers the hardware supports
> to be very useful. For example I want/need to know if QinQ will work.
> Or VXLAN/NSH, Geneve, etc. This is NFL_HEADER_GRAPH.
>
> Sure if_flow.h is a poor name and FlowAPI is probably not really great.
> But both those names come from how I model the hardware as match action
> tables. I can change those names to switchdev_resources or
> switchdev_info if folks want.
>
> My point is the interface is generic and provides a common interface for
> most hardware I've seen to date. Certainly curious if there is hardware
> it doesn't map to. Although tables and pipelines seems fairly universal
> for a large set of devices to me. Further it can be extended as needed.
>
> I'll drop the set_rule/del_rule parts to be merged with ebpf/tc/qdisc.
Ya, you're talking about tables in hardware. I just want to dump some
driver-internal driver-specific data in tabular form to user-space so
I can grep thru it and debug.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [RFC PATCH net-next 0/2] Add new switchdev device class
2015-08-27 8:17 ` Scott Feldman
@ 2015-08-27 8:41 ` Jiri Pirko
0 siblings, 0 replies; 22+ messages in thread
From: Jiri Pirko @ 2015-08-27 8:41 UTC (permalink / raw)
To: Scott Feldman; +Cc: Netdev, David S. Miller, Florian Fainelli, Roopa Prabhu
Thu, Aug 27, 2015 at 10:17:54AM CEST, sfeldma@gmail.com wrote:
>On Thu, Aug 27, 2015 at 12:27 AM, Jiri Pirko <jiri@resnulli.us> wrote:
>> Thu, Aug 27, 2015 at 09:16:44AM CEST, sfeldma@gmail.com wrote:
>>>From: Scott Feldman <sfeldma@gmail.com>
>>>
>>>In the switchdev model, we use netdevs to represent switchdev ports, but we
>>>have no representation for the switch itself. So, introduce a new switchdev
>>>device class so we can define semantics and programming interfaces for the
>>>switch itself. Switchdev device class isn't tied to any particular bus.
>>>
>>>This patch set is just the skeleton to get us started. It adds the sysfs
>>>object registration for the new class and defines a class-level attr "foo".
>>>With the new class, we could hook PM functions, for example, to handle power
>>>transitions at the switch level. I registered rocker and get:
>>>
>>> $ ls /sys/class/switchdev/5254001235010000/
>>> foo power subsystem uevent
>>
>> No, please avoid adding anything to sysfs. If we need to add anything,
>> lets make is accesible using Netlink only.
>
>I see no harm in using the device model to define and new device class
>which just so happens to show up in sysfs. What sysfs attrs get
>exposed is where we can have some discussion/rules.
>
>>>So what next? I'd rather not build APIs around sysfs, so we need a netlink API
>>>we can build on top of this. It's not really rtnl. Maybe genl would work?
>>>What ever it is, we'd need to teach iproute2 about a new 'switch' command.
>>>
>>>Netlink API would allow us to represent switch-wide objects such as registers,
>>>tables, stats, firmware, and maybe even control. I think with with netlink
>>>TLVs, we can create a framework for these objects but still allow the switch
>>>driver provide switch-specific info. For example, a table object:
>>>
>>>[TABLES]
>>> [TABLE]
>>> [FIELDS]
>>> [FIELD]
>>> [ID, TYPE]
>>> [DATA]
>>> [ID, VALUE]
>>
>> Alert! I feel that someone would like to abuse this iface for writing
>> configuration through. This should be read-only by design. I also think
>> that this should not be something switch-specific. I believe that NIC
>> drivers would benefit from this iface as well when they want to expose
>> something. I think we should use genl for this.
>
>Read-only is fine. Look, I'm just trying to dump rocker internal
>tables in some format I can grep outside the kernel. The tables are
>get big and complicated fast and printk doesn't cut it. I can use
>degugfs privately, but I need to be able to dump same for field
>troubleshooting. I can't use debugfs, so I want some kind of
>XML-like dump facility. It's going to have device-specific data, so
>I'm looking for an XML-like way to represent this data in netlink.
Makes sense.
>
>>>
>>>Maybe iproute2 has pretty-printers for specific switches like ethtool has for
>>>reg dumps.
>>
>> I feel like a lot of what you described overlaps with existing
>> interfaces and tools. Why don't we just reuse that? For firmware for
>> example, just take one of the ports. Same for stats (I plan to expose my
>> mlxsw switch-wide stats in ethtool so they are accessible through every
>> port netdevice).
>
>Port-centric stats are fine for port netdevs, but I'd like switch-wide
>stats to show up elsewhere.
Think about tools, infrastructure, it would get unnecessary complex fast.
I think that if we can, we should use existing infra. So far we can.
>
>Thinking ahead: I'd like to put port into namespaces and I don't want
>to dump stats on a port and see stats on ports in other namespaces.
You would not see stats of other ports, never. You would just see
switch-wide stats.
>
>> I still do not see the need for new device class. I have strong feeling
>> that it should be avoided.
>
>Ok
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [RFC PATCH net-next 0/2] Add new switchdev device class
2015-08-27 7:45 ` Andrew Lunn
@ 2015-08-27 8:42 ` Scott Feldman
2015-08-27 9:06 ` Andrew Lunn
0 siblings, 1 reply; 22+ messages in thread
From: Scott Feldman @ 2015-08-27 8:42 UTC (permalink / raw)
To: Andrew Lunn
Cc: Netdev, Jiří Pírko, David S. Miller,
Florian Fainelli, Roopa Prabhu
On Thu, Aug 27, 2015 at 12:45 AM, Andrew Lunn <andrew@lunn.ch> wrote:
>> I don't know about how this overlaps with DSA platform_class. Florian?
>
> There is some overlap with DSA, but the current DSA model, with
> respect to probing, is broken. So this might be interesting as a way
> towards fix that.
>
> One thing to keep in mind is the D in DSA. You talk about switch,
> singular. DSA has a number of switches in a cluster. We currently
> export a single switchdev interface for the cluster, but there are
> some properties which are per switch, e.g. temperature, eeprom
> contents, statistics, power management etc.
Export a single 'switchdev' or 'netdev' for the cluster? I hope that
was a typo. With switchdev device class, you'd instantiate one per
phy switch, and have per-switch props (temp, eeprom, etc) thru each
switchdev instance. The cluster netdev would stay a netdev which
spans the switches.
>
> Although ethtool does have options for these, it is not always a
> natural fit. ethtool --eeprom-dump on a switch port dumps the switch
> EEPROM, and all ports on the switch can be used. --register-dump on a
> port is good for showing the per ports registers, but there is no
> natural interface for showing the global switch registers. Florian's
> resent L2 interface patch shows we have issues getting access to the
> 'cpu' port, the port which interfaces to the host.
>
> We need to be careful that any new interfaces we add are better at
> representing the true structure of the hardware, which includes there
> being multiple physical switches below a switchdev, and they are
> connected together by ports which are currently not visible as
> netdevs.
Right. I'm thinking one switchdev instance (based on the new
switchdev class) per phy switch. Port netdevs for switch ports worth
exposing (for user interaction). And cluster netdev for dsa tagging
encap/decap. Do I have this right?
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [RFC PATCH net-next 0/2] Add new switchdev device class
2015-08-27 8:42 ` Scott Feldman
@ 2015-08-27 9:06 ` Andrew Lunn
2015-08-27 16:41 ` Scott Feldman
0 siblings, 1 reply; 22+ messages in thread
From: Andrew Lunn @ 2015-08-27 9:06 UTC (permalink / raw)
To: Scott Feldman
Cc: Netdev, Ji??í Pírko, David S. Miller, Florian Fainelli,
Roopa Prabhu
On Thu, Aug 27, 2015 at 01:42:24AM -0700, Scott Feldman wrote:
> On Thu, Aug 27, 2015 at 12:45 AM, Andrew Lunn <andrew@lunn.ch> wrote:
> >> I don't know about how this overlaps with DSA platform_class. Florian?
> >
> > There is some overlap with DSA, but the current DSA model, with
> > respect to probing, is broken. So this might be interesting as a way
> > towards fix that.
> >
> > One thing to keep in mind is the D in DSA. You talk about switch,
> > singular. DSA has a number of switches in a cluster. We currently
> > export a single switchdev interface for the cluster, but there are
> > some properties which are per switch, e.g. temperature, eeprom
> > contents, statistics, power management etc.
>
> Export a single 'switchdev' or 'netdev' for the cluster? I hope that
> was a typo.
I probably expressed that badly. The hardware i have on my desk has
three Marvell switches in a chain, with one end of the chain connected
to a host Ethernet interface.
>From the switchdev ops level, you don't see anything of this
chain. But some of the operations do need to be aware of this chain,
for example vlans which span multiple chips in this chain.
> With switchdev device class, you'd instantiate one per
> phy switch, and have per-switch props (temp, eeprom, etc) thru each
> switchdev instance.
O.K. This is fine, but we need people to understand that a switchdev
device class represents some middle layer in the hierarchy, not the
top layer. Otherwise false assumptions might be made.
Andrew
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [RFC PATCH net-next 0/2] Add new switchdev device class
2015-08-27 7:16 [RFC PATCH net-next 0/2] Add new switchdev device class sfeldma
` (4 preceding siblings ...)
2015-08-27 7:45 ` Andrew Lunn
@ 2015-08-27 13:54 ` John W. Linville
2015-08-27 23:25 ` David Miller
2015-08-28 2:13 ` Arad, Ronen
7 siblings, 0 replies; 22+ messages in thread
From: John W. Linville @ 2015-08-27 13:54 UTC (permalink / raw)
To: sfeldma; +Cc: netdev, jiri, davem, f.fainelli, roopa
On Thu, Aug 27, 2015 at 12:16:44AM -0700, sfeldma@gmail.com wrote:
> From: Scott Feldman <sfeldma@gmail.com>
>
> In the switchdev model, we use netdevs to represent switchdev ports, but we
> have no representation for the switch itself. So, introduce a new switchdev
> device class so we can define semantics and programming interfaces for the
> switch itself. Switchdev device class isn't tied to any particular bus.
>
> This patch set is just the skeleton to get us started. It adds the sysfs
> object registration for the new class and defines a class-level attr "foo".
> With the new class, we could hook PM functions, for example, to handle power
> transitions at the switch level. I registered rocker and get:
>
> $ ls /sys/class/switchdev/5254001235010000/
> foo power subsystem uevent
>
> So what next? I'd rather not build APIs around sysfs, so we need a netlink API
> we can build on top of this. It's not really rtnl. Maybe genl would work?
> What ever it is, we'd need to teach iproute2 about a new 'switch' command.
>
> Netlink API would allow us to represent switch-wide objects such as registers,
> tables, stats, firmware, and maybe even control. I think with with netlink
> TLVs, we can create a framework for these objects but still allow the switch
> driver provide switch-specific info. For example, a table object:
>
> [TABLES]
> [TABLE]
> [FIELDS]
> [FIELD]
> [ID, TYPE]
> [DATA]
> [ID, VALUE]
>
> Maybe iproute2 has pretty-printers for specific switches like ethtool has for
> reg dumps.
>
> I don't know about how this overlaps with DSA platform_class. Florian?
>
> Comments?
I think this makes a lot of sense, for many of the reasons you cite
later in the thread. Switches are complex devices with multiple
facets that are difficult to map directly to existing abstractions
without creating artificial adaptations or leaving something out.
Giving the switch itself a representation in the device tree seems
like the right way to go.
John
--
John W. Linville Someday the world will need a hero, and you
linville@tuxdriver.com might be all we have. Be ready.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [RFC PATCH net-next 0/2] Add new switchdev device class
2015-08-27 9:06 ` Andrew Lunn
@ 2015-08-27 16:41 ` Scott Feldman
2015-08-28 11:52 ` Andrew Lunn
0 siblings, 1 reply; 22+ messages in thread
From: Scott Feldman @ 2015-08-27 16:41 UTC (permalink / raw)
To: Andrew Lunn
Cc: Netdev, Ji??í Pírko, David S. Miller, Florian Fainelli,
Roopa Prabhu
On Thu, Aug 27, 2015 at 2:06 AM, Andrew Lunn <andrew@lunn.ch> wrote:
> On Thu, Aug 27, 2015 at 01:42:24AM -0700, Scott Feldman wrote:
>> On Thu, Aug 27, 2015 at 12:45 AM, Andrew Lunn <andrew@lunn.ch> wrote:
>> >> I don't know about how this overlaps with DSA platform_class. Florian?
>> >
>> > There is some overlap with DSA, but the current DSA model, with
>> > respect to probing, is broken. So this might be interesting as a way
>> > towards fix that.
>> >
>> > One thing to keep in mind is the D in DSA. You talk about switch,
>> > singular. DSA has a number of switches in a cluster. We currently
>> > export a single switchdev interface for the cluster, but there are
>> > some properties which are per switch, e.g. temperature, eeprom
>> > contents, statistics, power management etc.
>>
>> Export a single 'switchdev' or 'netdev' for the cluster? I hope that
>> was a typo.
>
> I probably expressed that badly. The hardware i have on my desk has
> three Marvell switches in a chain, with one end of the chain connected
> to a host Ethernet interface.
>
> From the switchdev ops level, you don't see anything of this
> chain. But some of the operations do need to be aware of this chain,
> for example vlans which span multiple chips in this chain.
>
>> With switchdev device class, you'd instantiate one per
>> phy switch, and have per-switch props (temp, eeprom, etc) thru each
>> switchdev instance.
>
> O.K. This is fine, but we need people to understand that a switchdev
> device class represents some middle layer in the hierarchy, not the
> top layer. Otherwise false assumptions might be made.
So with kobj, a device can have a parent. So I experimented with my
RFC patch and changed register_switchdev to take a parent switchdev
arg, which is NULL for leaf switchdevs:
int register_switchdev(struct switchdev *sdev, const char *name,
struct switchdev *parent)
{
struct device *dev = &sdev->dev;
int err;
device_initialize(dev);
dev->class = &switchdev_class;
if (parent)
dev->parent = &parent->dev;
err = dev_set_name(dev, "%s", name);
if (err)
return err;
return device_add(dev);
}
Then I tried this with rocker and it works as expected. On module
load, I create the master switchdev, and then on PCI probe for each
phys switch dev, I put the slave switchdev in the master using
register_switchdev. Here's one slave in the master "rockers" switch:
tree /sys/class/switchdev/rockers
/sys/class/switchdev/rockers
├── 5254001235010000
│ ├── device -> ../../rocker
│ ├── foo
│ ├── power
│ │ ├── async
│ │ ├── autosuspend_delay_ms
│ │ ├── control
│ │ ├── runtime_active_kids
│ │ ├── runtime_active_time
│ │ ├── runtime_enabled
│ │ ├── runtime_status
│ │ ├── runtime_suspended_time
│ │ └── runtime_usage
│ ├── subsystem -> ../../../../../class/switchdev
│ └── uevent
├── foo
├── power
│ ├── async
│ ├── autosuspend_delay_ms
│ ├── control
│ ├── runtime_active_kids
│ ├── runtime_active_time
│ ├── runtime_enabled
│ ├── runtime_status
│ ├── runtime_suspended_time
│ └── runtime_usage
├── subsystem -> ../../../../class/switchdev
└── uevent
With this, we can stack switchdevs, I guess as high as we want. Does
this look usable for DSA? An attr set on the master would get pushed
down to the leaves. We'd can do it with the same style of recursive
algos we use for switchdev port attrs.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [RFC PATCH net-next 0/2] Add new switchdev device class
2015-08-27 7:16 [RFC PATCH net-next 0/2] Add new switchdev device class sfeldma
` (5 preceding siblings ...)
2015-08-27 13:54 ` John W. Linville
@ 2015-08-27 23:25 ` David Miller
2015-08-28 2:13 ` Arad, Ronen
7 siblings, 0 replies; 22+ messages in thread
From: David Miller @ 2015-08-27 23:25 UTC (permalink / raw)
To: sfeldma; +Cc: netdev, jiri, f.fainelli, roopa
From: sfeldma@gmail.com
Date: Thu, 27 Aug 2015 00:16:44 -0700
> Comments?
No fundamental objections from me.
I just want to reiterate one thing I think Jiri said.
There are other kinds of devices which make up this kind of hierarchy.
I can think of two examples involving bonafide ethernet ports.
1) A top-level parent device provides the resources for all of the RX
and TX queues, which are allocated and divided by the driver down into
the ethernet ports below. Example: niu
2) Cards are going to need more than one PCI-E slot to get all of the
PCI-E lanes necessary to saturate the link. Two PCI devices
show up and need to get probed in this scenerio and it would be
nice to have some object to represent the logical "glueing"
together of those two devices.
^ permalink raw reply [flat|nested] 22+ messages in thread
* RE: [RFC PATCH net-next 0/2] Add new switchdev device class
2015-08-27 7:16 [RFC PATCH net-next 0/2] Add new switchdev device class sfeldma
` (6 preceding siblings ...)
2015-08-27 23:25 ` David Miller
@ 2015-08-28 2:13 ` Arad, Ronen
2015-08-28 16:55 ` Florian Fainelli
7 siblings, 1 reply; 22+ messages in thread
From: Arad, Ronen @ 2015-08-28 2:13 UTC (permalink / raw)
To: sfeldma@gmail.com, netdev@vger.kernel.org
Cc: jiri@resnulli.us, davem@davemloft.net, f.fainelli@gmail.com,
roopa@cumulusnetworks.com
>-----Original Message-----
>From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org] On
>Behalf Of sfeldma@gmail.com
>Sent: Thursday, August 27, 2015 12:17 AM
>To: netdev@vger.kernel.org
>Cc: jiri@resnulli.us; davem@davemloft.net; f.fainelli@gmail.com;
>roopa@cumulusnetworks.com
>Subject: [RFC PATCH net-next 0/2] Add new switchdev device class
>
>From: Scott Feldman <sfeldma@gmail.com>
>
>
>So what next? I'd rather not build APIs around sysfs, so we need a netlink
>API
>we can build on top of this. It's not really rtnl. Maybe genl would work?
>What ever it is, we'd need to teach iproute2 about a new 'switch' command.
>
[@Ronen] I developed PoC code based on genl which allows access for what I
call device options. It generalizes libteam/team driver option handling.
It allows for fields of all Netlink scalar or string types as well as arrays.
It differentiates between port-specific and device options.
(It was not limited to read-only but this could be changed to address the
concerns raised on this thread)
Extending to Tables from just a list of named options is welcomed.
The diagram below shows possible architecture.
+-----------------------------------------+
| tool (e.g. swdevnl, iproute2) |
+-----------------------------------------+
|
+-----------------+
| libswdev |
+-----------------+
|
+-----------------------------+
| libnl3 |
+-----------------------------+
|
User |
-------------------------------------------------
Kernel |
|
+-------------------+ +-------------------+
| genetlink | | rtnetlink |
+-------------------+ +-------------------+
|
+-----------+
| swdev |
+-----------+
|
+-----------------------------------------+
| |
| SOMEswitch |
| |
+-----------------------------------------+
Libswdev in the diagram is a user space library which should abstract the
netlink interaction and encoding details from user-space tools.
Swdev is a kernel module which provides similar abstraction to drivers. It
saves drivers from most of the low level code.
Drivers register their supported options (or Table/Fields) with this module
and provide getters functions. The Swdev kernel module provides the genl API
for exporting device specific information.
This architecture allows for a generic tool to discover the information
available from each driver/port. The tool could extract sufficient
information which allows it to present user-friendly interface to users for
drilling down and retrieving specific details.
>Netlink API would allow us to represent switch-wide objects such as registers,
>tables, stats, firmware, and maybe even control. I think with with netlink
>TLVs, we can create a framework for these objects but still allow the switch
>driver provide switch-specific info. For example, a table object:
>
>[TABLES]
> [TABLE]
> [FIELDS]
> [FIELD]
> [ID, TYPE]
> [DATA]
> [ID, VALUE]
>
[@Ronen] Some additional information could be useful. TABLE name, FIELD name,
(possible also short names for CLI commands or pretty printing of table
header), FIELD value range (help with pp), TABLE/FIELD description.
>Maybe iproute2 has pretty-printers for specific switches like ethtool has for
>reg dumps.
>
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [RFC PATCH net-next 0/2] Add new switchdev device class
2015-08-27 16:41 ` Scott Feldman
@ 2015-08-28 11:52 ` Andrew Lunn
0 siblings, 0 replies; 22+ messages in thread
From: Andrew Lunn @ 2015-08-28 11:52 UTC (permalink / raw)
To: Scott Feldman
Cc: Netdev, Ji??í Pírko, David S. Miller, Florian Fainelli,
Roopa Prabhu
> So with kobj, a device can have a parent. So I experimented with my
> RFC patch and changed register_switchdev to take a parent switchdev
> arg, which is NULL for leaf switchdevs:
>
> int register_switchdev(struct switchdev *sdev, const char *name,
> struct switchdev *parent)
> {
> struct device *dev = &sdev->dev;
> int err;
>
> device_initialize(dev);
>
> dev->class = &switchdev_class;
> if (parent)
> dev->parent = &parent->dev;
>
> err = dev_set_name(dev, "%s", name);
> if (err)
> return err;
>
> return device_add(dev);
> }
...
> With this, we can stack switchdevs, I guess as high as we want. Does
> this look usable for DSA? An attr set on the master would get pushed
> down to the leaves. We'd can do it with the same style of recursive
> algos we use for switchdev port attrs.
Since this is a file system, we are limited to trees. But the hardware
is actually a graph. The interconnect points are ports on the switch,
and possible trunks/bonds of ports. I doubt there is a nice way to
represent this.
So it probably makes sense to have the switch with the port to the
host as the root of the tree, and all other switches are leafs of that
root.
How do you envisage addressing? I want to use netlink to read the
global registers from a specific switch for example.
Andrew
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [RFC PATCH net-next 0/2] Add new switchdev device class
2015-08-28 2:13 ` Arad, Ronen
@ 2015-08-28 16:55 ` Florian Fainelli
0 siblings, 0 replies; 22+ messages in thread
From: Florian Fainelli @ 2015-08-28 16:55 UTC (permalink / raw)
To: Arad, Ronen, sfeldma@gmail.com, netdev@vger.kernel.org
Cc: jiri@resnulli.us, davem@davemloft.net, roopa@cumulusnetworks.com
On 27/08/15 19:13, Arad, Ronen wrote:
>
>
>> -----Original Message-----
>> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org] On
>> Behalf Of sfeldma@gmail.com
>> Sent: Thursday, August 27, 2015 12:17 AM
>> To: netdev@vger.kernel.org
>> Cc: jiri@resnulli.us; davem@davemloft.net; f.fainelli@gmail.com;
>> roopa@cumulusnetworks.com
>> Subject: [RFC PATCH net-next 0/2] Add new switchdev device class
>>
>> From: Scott Feldman <sfeldma@gmail.com>
>>
>>
>> So what next? I'd rather not build APIs around sysfs, so we need a netlink
>> API
>> we can build on top of this. It's not really rtnl. Maybe genl would work?
>> What ever it is, we'd need to teach iproute2 about a new 'switch' command.
>>
> [@Ronen] I developed PoC code based on genl which allows access for what I
> call device options. It generalizes libteam/team driver option handling.
> It allows for fields of all Netlink scalar or string types as well as arrays.
> It differentiates between port-specific and device options.
OpenWrt has had a similar scheme, called swconfig:
Kernel code:
https://dev.openwrt.org/browser/trunk/target/linux/generic/files/drivers/net/phy/swconfig.c
User-space tools:
https://dev.openwrt.org/browser/trunk/package/network/config/swconfig/src
This was initially proposed but was rejected in favor of what became
switchdev because it did not focus exclusively on the switch device (as
a whole piece of hardware), but rather allowed configuration of things
that were already available through bridge/vlan etc..
Having a netlink interface to interface with a global (as in, not
per-port) switch device sounds like a good idea, especially if there are
events that need to be sent back to user-space, my only major concern
with this approach is to make sure there is careful review of what goes
into this interface such that:
- it is strongly defined, not just allow sending custom
u8/u16/u32/u64/blobs attributes back and forth, but rather have a
properly defined set of commands and associated data-structures
- this always covers something that is not, by nature a switch
port/physical interface attribute, and for which there is not an
existing interface
Bottom line being that it is very easy for this interface to be a
catch-all, dumping ground of things that did not fit within existing
facilities...
> (It was not limited to read-only but this could be changed to address the
> concerns raised on this thread)
> Extending to Tables from just a list of named options is welcomed.
>
> The diagram below shows possible architecture.
>
> +-----------------------------------------+
> | tool (e.g. swdevnl, iproute2) |
> +-----------------------------------------+
> |
> +-----------------+
> | libswdev |
> +-----------------+
> |
> +-----------------------------+
> | libnl3 |
> +-----------------------------+
> |
> User |
> -------------------------------------------------
> Kernel |
> |
> +-------------------+ +-------------------+
> | genetlink | | rtnetlink |
> +-------------------+ +-------------------+
> |
> +-----------+
> | swdev |
> +-----------+
> |
> +-----------------------------------------+
> | |
> | SOMEswitch |
> | |
> +-----------------------------------------+
>
> Libswdev in the diagram is a user space library which should abstract the
> netlink interaction and encoding details from user-space tools.
>
> Swdev is a kernel module which provides similar abstraction to drivers. It
> saves drivers from most of the low level code.
>
> Drivers register their supported options (or Table/Fields) with this module
> and provide getters functions. The Swdev kernel module provides the genl API
> for exporting device specific information.
>
> This architecture allows for a generic tool to discover the information
> available from each driver/port. The tool could extract sufficient
> information which allows it to present user-friendly interface to users for
> drilling down and retrieving specific details.
>
>> Netlink API would allow us to represent switch-wide objects such as registers,
>> tables, stats, firmware, and maybe even control. I think with with netlink
>> TLVs, we can create a framework for these objects but still allow the switch
>> driver provide switch-specific info. For example, a table object:
>>
>> [TABLES]
>> [TABLE]
>> [FIELDS]
>> [FIELD]
>> [ID, TYPE]
>> [DATA]
>> [ID, VALUE]
>>
> [@Ronen] Some additional information could be useful. TABLE name, FIELD name,
> (possible also short names for CLI commands or pretty printing of table
> header), FIELD value range (help with pp), TABLE/FIELD description.
>
>> Maybe iproute2 has pretty-printers for specific switches like ethtool has for
>> reg dumps.
>>
--
Florian
^ permalink raw reply [flat|nested] 22+ messages in thread
end of thread, other threads:[~2015-08-28 16:58 UTC | newest]
Thread overview: 22+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-08-27 7:16 [RFC PATCH net-next 0/2] Add new switchdev device class sfeldma
2015-08-27 7:16 ` [RFC PATCH net-next 1/2] switchdev: create " sfeldma
2015-08-27 7:16 ` [RFC PATCH net-next 2/2] rocker: register each switch as a switchdev sfeldma
2015-08-27 7:27 ` [RFC PATCH net-next 0/2] Add new switchdev device class Jiri Pirko
2015-08-27 7:43 ` John Fastabend
2015-08-27 7:51 ` Jiri Pirko
2015-08-27 8:14 ` John Fastabend
2015-08-27 8:17 ` Scott Feldman
2015-08-27 8:41 ` Jiri Pirko
2015-08-27 7:36 ` John Fastabend
2015-08-27 7:44 ` Jiri Pirko
2015-08-27 8:09 ` John Fastabend
2015-08-27 8:23 ` Scott Feldman
2015-08-27 7:45 ` Andrew Lunn
2015-08-27 8:42 ` Scott Feldman
2015-08-27 9:06 ` Andrew Lunn
2015-08-27 16:41 ` Scott Feldman
2015-08-28 11:52 ` Andrew Lunn
2015-08-27 13:54 ` John W. Linville
2015-08-27 23:25 ` David Miller
2015-08-28 2:13 ` Arad, Ronen
2015-08-28 16:55 ` Florian Fainelli
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).