* [PATCH] net: bridge: add max_fdb_count @ 2017-11-15 19:27 Sarah Newman 2017-11-15 19:43 ` Sarah Newman ` (5 more replies) 0 siblings, 6 replies; 27+ messages in thread From: Sarah Newman @ 2017-11-15 19:27 UTC (permalink / raw) To: netdev; +Cc: Sarah Newman Current memory and CPU usage for managing bridge fdb entries is unbounded. Add a parameter max_fdb_count, controlled from sysfs, which places an upper limit on the number of entries. Defaults to 1024. When max_fdb_count is met or exceeded, whether traffic is sent out a given port should depend on its flooding behavior. This may instead be mitigated by filtering mac address entries in the PREROUTING chain of the ebtables nat table, but this is only practical when mac addresses are known in advance. Signed-off-by: Sarah Newman <srn@prgmr.com> --- net/bridge/br_device.c | 2 ++ net/bridge/br_fdb.c | 25 ++++++++++++++++++++----- net/bridge/br_private.h | 3 +++ net/bridge/br_sysfs_br.c | 24 ++++++++++++++++++++++++ 4 files changed, 49 insertions(+), 5 deletions(-) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index af5b8c8..aa7a7f4 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -432,6 +432,8 @@ void br_dev_setup(struct net_device *dev) br->bridge_hello_time = br->hello_time = 2 * HZ; br->bridge_forward_delay = br->forward_delay = 15 * HZ; br->bridge_ageing_time = br->ageing_time = BR_DEFAULT_AGEING_TIME; + br->max_fdb_count = 1024; + br->fdb_count = 0; dev->max_mtu = ETH_MAX_MTU; br_netfilter_rtable_init(br); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 4ea5c8b..0422d48 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -179,6 +179,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) hlist_del_init_rcu(&f->hlist); fdb_notify(br, f, RTM_DELNEIGH); + br->fdb_count -= 1; call_rcu(&f->rcu, fdb_rcu_free); } @@ -478,7 +479,8 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, return num; } -static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, +static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br, + struct hlist_head *head, struct net_bridge_port *source, const unsigned char *addr, __u16 vid, @@ -498,6 +500,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, fdb->added_by_external_learn = 0; fdb->offloaded = 0; fdb->updated = fdb->used = jiffies; + br->fdb_count += 1; hlist_add_head_rcu(&fdb->hlist, head); } return fdb; @@ -524,7 +527,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, fdb_delete(br, fdb); } - fdb = fdb_create(head, source, addr, vid, 1, 1); + fdb = fdb_create(br, head, source, addr, vid, 1, 1); if (!fdb) return -ENOMEM; @@ -556,6 +559,10 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, if (hold_time(br) == 0) return; + /* Place maximum on number of learned entries. */ + if (br->max_fdb_count <= br->fdb_count) + return; + /* ignore packets unless we are using this port */ if (!(source->state == BR_STATE_LEARNING || source->state == BR_STATE_FORWARDING)) @@ -591,7 +598,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, } else { spin_lock(&br->hash_lock); if (likely(!fdb_find_rcu(head, addr, vid))) { - fdb = fdb_create(head, source, addr, vid, 0, 0); + fdb = fdb_create(br, head, source, addr, vid, 0, 0); if (fdb) { if (unlikely(added_by_user)) fdb->added_by_user = 1; @@ -787,7 +794,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, if (!(flags & NLM_F_CREATE)) return -ENOENT; - fdb = fdb_create(head, source, addr, vid, 0, 0); + fdb = fdb_create(br, head, source, addr, vid, 0, 0); if (!fdb) return -ENOMEM; @@ -1081,7 +1088,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, head = &br->hash[br_mac_hash(addr, vid)]; fdb = br_fdb_find(br, addr, vid); if (!fdb) { - fdb = fdb_create(head, p, addr, vid, 0, 0); + fdb = fdb_create(br, head, p, addr, vid, 0, 0); if (!fdb) { err = -ENOMEM; goto err_unlock; @@ -1147,3 +1154,11 @@ void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p, spin_unlock_bh(&br->hash_lock); } + +int br_set_max_fdb_count(struct net_bridge *br, unsigned long max_fdb_count) +{ + spin_lock_bh(&br->lock); + br->max_fdb_count = max_fdb_count; + spin_unlock_bh(&br->lock); + return 0; +} diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 1312b8d..98aae1f 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -343,6 +343,8 @@ struct net_bridge { unsigned long bridge_hello_time; unsigned long bridge_forward_delay; unsigned long bridge_ageing_time; + unsigned long max_fdb_count; + unsigned long fdb_count; u8 group_addr[ETH_ALEN]; bool group_addr_set; @@ -549,6 +551,7 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid); void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid); +int br_set_max_fdb_count(struct net_bridge *br, unsigned long x); /* br_forward.c */ enum br_pkt_type { diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 723f25e..18fabdf 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -335,6 +335,28 @@ static ssize_t flush_store(struct device *d, } static DEVICE_ATTR_WO(flush); +static ssize_t max_fdb_count_show(struct device *d, struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%lu\n", br->max_fdb_count); +} + +static ssize_t max_fdb_count_store(struct device *d, struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_set_max_fdb_count); +} +static DEVICE_ATTR_RW(max_fdb_count); + +static ssize_t fdb_count_show(struct device *d, struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%lu\n", br->fdb_count); +} +static DEVICE_ATTR_RO(fdb_count); + #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t multicast_router_show(struct device *d, struct device_attribute *attr, char *buf) @@ -830,6 +852,8 @@ static ssize_t vlan_stats_enabled_store(struct device *d, &dev_attr_gc_timer.attr, &dev_attr_group_addr.attr, &dev_attr_flush.attr, + &dev_attr_max_fdb_count.attr, + &dev_attr_fdb_count.attr, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING &dev_attr_multicast_router.attr, &dev_attr_multicast_snooping.attr, -- 1.9.1 ^ permalink raw reply related [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-15 19:27 [PATCH] net: bridge: add max_fdb_count Sarah Newman @ 2017-11-15 19:43 ` Sarah Newman 2017-11-15 20:04 ` Stephen Hemminger ` (4 subsequent siblings) 5 siblings, 0 replies; 27+ messages in thread From: Sarah Newman @ 2017-11-15 19:43 UTC (permalink / raw) To: netdev On 11/15/2017 11:27 AM, Sarah Newman wrote: > Current memory and CPU usage for managing bridge fdb entries is unbounded. > Add a parameter max_fdb_count, controlled from sysfs, which places an upper > limit on the number of entries. Defaults to 1024. > > When max_fdb_count is met or exceeded, whether traffic is sent out a > given port should depend on its flooding behavior. > > This may instead be mitigated by filtering mac address entries in the > PREROUTING chain of the ebtables nat table, but this is only practical > when mac addresses are known in advance. > > Signed-off-by: Sarah Newman <srn@prgmr.com> I would love to improve this patch, but have limited time to devote to this... What I would try first would be to maintain a data structure roughly ordered based on both number of times an address was observed as well as age and evict the least used, oldest entry when max_fdb_count was reached. --Sarah ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-15 19:27 [PATCH] net: bridge: add max_fdb_count Sarah Newman 2017-11-15 19:43 ` Sarah Newman @ 2017-11-15 20:04 ` Stephen Hemminger 2017-11-16 2:25 ` Andrew Lunn 2017-11-15 21:34 ` Egil Hjelmeland ` (3 subsequent siblings) 5 siblings, 1 reply; 27+ messages in thread From: Stephen Hemminger @ 2017-11-15 20:04 UTC (permalink / raw) To: Sarah Newman; +Cc: netdev On Wed, 15 Nov 2017 11:27:07 -0800 Sarah Newman <srn@prgmr.com> wrote: > Current memory and CPU usage for managing bridge fdb entries is unbounded. > Add a parameter max_fdb_count, controlled from sysfs, which places an upper > limit on the number of entries. Defaults to 1024. > > When max_fdb_count is met or exceeded, whether traffic is sent out a > given port should depend on its flooding behavior. > > This may instead be mitigated by filtering mac address entries in the > PREROUTING chain of the ebtables nat table, but this is only practical > when mac addresses are known in advance. > > Signed-off-by: Sarah Newman <srn@prgmr.com> Thanks for your patch, I can see how this can be a real problem, especially for a learning bridge. There are some details which probably need to be resolved before this can be applied. * if the bridge is being DoS attacked by random MAC addresses then the policy under spam needs to be considered. Not sure if not inserting the new entry is the best policy. * The counter probably doesn't need to be unsigned long (64 bit on x86_64) maybe u32 is enough. * The bridge attributes in general are controllable both by netlink and sysfs. It would make sense to do this for max fdb as well. Also what do the vendors using bridge for L2 offload to switch think? Many switches need to clone table, and similar limits must be in other versions. FreeBSD has a limit like this. ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-15 20:04 ` Stephen Hemminger @ 2017-11-16 2:25 ` Andrew Lunn 2017-11-16 4:05 ` Toshiaki Makita 0 siblings, 1 reply; 27+ messages in thread From: Andrew Lunn @ 2017-11-16 2:25 UTC (permalink / raw) To: Stephen Hemminger; +Cc: Sarah Newman, netdev > Also what do the vendors using bridge for L2 offload to switch think? The Marvell L2 switches which DSA supports have 8K FDB/MDB entries. So maybe 1024 is a bit low? How big is an FDB entry? Even an OpenWRT/LEDE class devices with 512MB RAM can probably support 1MB of RAM for FDB entries. > Many switches need to clone table, and similar limits must be in > other versions. DSA does not maintain synchronisation between the hardware tables and the software tables. So no cloning. We expect both bridges to perform learning based on the frames. This means we should be able to handle one bridge flooding because its table is full, while the other has an entry and forwards out the correct port. Andrew ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-16 2:25 ` Andrew Lunn @ 2017-11-16 4:05 ` Toshiaki Makita 2017-11-16 4:54 ` Sarah Newman 0 siblings, 1 reply; 27+ messages in thread From: Toshiaki Makita @ 2017-11-16 4:05 UTC (permalink / raw) To: Andrew Lunn, Stephen Hemminger, Sarah Newman; +Cc: netdev On 2017/11/16 11:25, Andrew Lunn wrote: >> Also what do the vendors using bridge for L2 offload to switch think? > > The Marvell L2 switches which DSA supports have 8K FDB/MDB entries. So > maybe 1024 is a bit low? How about U32_MAX by default since it is currently not restricted. (assuming the field will be changed to u32 as per Stephen's feedback). Otherwise users may suffer from unexpected behavior change by updating kernel? -- Toshiaki Makita ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-16 4:05 ` Toshiaki Makita @ 2017-11-16 4:54 ` Sarah Newman 2017-11-16 6:13 ` Toshiaki Makita 0 siblings, 1 reply; 27+ messages in thread From: Sarah Newman @ 2017-11-16 4:54 UTC (permalink / raw) To: Toshiaki Makita, Andrew Lunn, Stephen Hemminger; +Cc: netdev On 11/15/2017 08:05 PM, Toshiaki Makita wrote: > On 2017/11/16 11:25, Andrew Lunn wrote: >>> Also what do the vendors using bridge for L2 offload to switch think? >> >> The Marvell L2 switches which DSA supports have 8K FDB/MDB entries. So >> maybe 1024 is a bit low? > > How about U32_MAX by default since it is currently not restricted. > (assuming the field will be changed to u32 as per Stephen's feedback). > > Otherwise users may suffer from unexpected behavior change by updating > kernel? > U32_MAX seems like much too high a default to be helpful to a typical user. How many devices are realistically on a single bridge in the wild? Double that seems like a reasonable default. Additionally, since the exact limit seems controversial, it can be made a configuration parameter. What about the rest? --Sarah ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-16 4:54 ` Sarah Newman @ 2017-11-16 6:13 ` Toshiaki Makita 2017-11-16 6:20 ` Roopa Prabhu 0 siblings, 1 reply; 27+ messages in thread From: Toshiaki Makita @ 2017-11-16 6:13 UTC (permalink / raw) To: Sarah Newman, Andrew Lunn, Stephen Hemminger; +Cc: netdev On 2017/11/16 13:54, Sarah Newman wrote: > On 11/15/2017 08:05 PM, Toshiaki Makita wrote: >> On 2017/11/16 11:25, Andrew Lunn wrote: >>>> Also what do the vendors using bridge for L2 offload to switch think? >>> >>> The Marvell L2 switches which DSA supports have 8K FDB/MDB entries. So >>> maybe 1024 is a bit low? >> >> How about U32_MAX by default since it is currently not restricted. >> (assuming the field will be changed to u32 as per Stephen's feedback). >> >> Otherwise users may suffer from unexpected behavior change by updating >> kernel? >> > > U32_MAX seems like much too high a default to be helpful to a typical user. How many devices are realistically on a single bridge in the wild? Double > that seems like a reasonable default. I'm suggesting the most unrealistic number to essentially disable the restriction by default. My understanding is that we put a priority on not to break existing users even if the new restriction looks reasonable for most people. -- Toshiaki Makita ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-16 6:13 ` Toshiaki Makita @ 2017-11-16 6:20 ` Roopa Prabhu 2017-11-16 16:54 ` Stephen Hemminger 0 siblings, 1 reply; 27+ messages in thread From: Roopa Prabhu @ 2017-11-16 6:20 UTC (permalink / raw) To: Toshiaki Makita; +Cc: Sarah Newman, Andrew Lunn, Stephen Hemminger, netdev On Wed, Nov 15, 2017 at 10:13 PM, Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> wrote: > On 2017/11/16 13:54, Sarah Newman wrote: >> On 11/15/2017 08:05 PM, Toshiaki Makita wrote: >>> On 2017/11/16 11:25, Andrew Lunn wrote: >>>>> Also what do the vendors using bridge for L2 offload to switch think? >>>> >>>> The Marvell L2 switches which DSA supports have 8K FDB/MDB entries. So >>>> maybe 1024 is a bit low? >>> >>> How about U32_MAX by default since it is currently not restricted. >>> (assuming the field will be changed to u32 as per Stephen's feedback). >>> >>> Otherwise users may suffer from unexpected behavior change by updating >>> kernel? >>> >> >> U32_MAX seems like much too high a default to be helpful to a typical user. How many devices are realistically on a single bridge in the wild? Double >> that seems like a reasonable default. > > I'm suggesting the most unrealistic number to essentially disable the > restriction by default. > My understanding is that we put a priority on not to break existing > users even if the new restriction looks reasonable for most people. +1 , and yes, 1024 is very low. some of the switches we use support around 128K FDB entries and we have seen that number increase fairly quickly in newer generation switches. Default should be no limit to not break existing users. ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-16 6:20 ` Roopa Prabhu @ 2017-11-16 16:54 ` Stephen Hemminger 0 siblings, 0 replies; 27+ messages in thread From: Stephen Hemminger @ 2017-11-16 16:54 UTC (permalink / raw) To: Roopa Prabhu; +Cc: Toshiaki Makita, Sarah Newman, Andrew Lunn, netdev On Wed, 15 Nov 2017 22:20:23 -0800 Roopa Prabhu <roopa@cumulusnetworks.com> wrote: > On Wed, Nov 15, 2017 at 10:13 PM, Toshiaki Makita > <makita.toshiaki@lab.ntt.co.jp> wrote: > > On 2017/11/16 13:54, Sarah Newman wrote: > >> On 11/15/2017 08:05 PM, Toshiaki Makita wrote: > >>> On 2017/11/16 11:25, Andrew Lunn wrote: > >>>>> Also what do the vendors using bridge for L2 offload to switch think? > >>>> > >>>> The Marvell L2 switches which DSA supports have 8K FDB/MDB entries. So > >>>> maybe 1024 is a bit low? > >>> > >>> How about U32_MAX by default since it is currently not restricted. > >>> (assuming the field will be changed to u32 as per Stephen's feedback). > >>> > >>> Otherwise users may suffer from unexpected behavior change by updating > >>> kernel? > >>> > >> > >> U32_MAX seems like much too high a default to be helpful to a typical user. How many devices are realistically on a single bridge in the wild? Double > >> that seems like a reasonable default. > > > > I'm suggesting the most unrealistic number to essentially disable the > > restriction by default. > > My understanding is that we put a priority on not to break existing > > users even if the new restriction looks reasonable for most people. > > +1 , and yes, 1024 is very low. some of the switches we use support > around 128K FDB entries and we have seen that number increase fairly > quickly in newer generation switches. Default should be no limit to > not break existing users. New features can not break existing users. My recommendation would be that 0 be used as a magic value to indicate no limit and that would be the default. Also the limit should be controllable on a per port of bridge (interface) basis. ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-15 19:27 [PATCH] net: bridge: add max_fdb_count Sarah Newman 2017-11-15 19:43 ` Sarah Newman 2017-11-15 20:04 ` Stephen Hemminger @ 2017-11-15 21:34 ` Egil Hjelmeland 2017-11-16 3:01 ` Andrew Lunn ` (2 subsequent siblings) 5 siblings, 0 replies; 27+ messages in thread From: Egil Hjelmeland @ 2017-11-15 21:34 UTC (permalink / raw) To: Sarah Newman, netdev Den 15. nov. 2017 20:27, skrev Sarah Newman: > Current memory and CPU usage for managing bridge fdb entries is unbounded. > Add a parameter max_fdb_count, controlled from sysfs, which places an upper > limit on the number of entries. Defaults to 1024. > > When max_fdb_count is met or exceeded, whether traffic is sent out a > given port should depend on its flooding behavior. > > This may instead be mitigated by filtering mac address entries in the > PREROUTING chain of the ebtables nat table, but this is only practical > when mac addresses are known in advance. > > Signed-off-by: Sarah Newman <srn@prgmr.com> > --- > net/bridge/br_device.c | 2 ++ > net/bridge/br_fdb.c | 25 ++++++++++++++++++++----- > net/bridge/br_private.h | 3 +++ > net/bridge/br_sysfs_br.c | 24 ++++++++++++++++++++++++ > 4 files changed, 49 insertions(+), 5 deletions(-) > > diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c > index 723f25e..18fabdf 100644 > --- a/net/bridge/br_sysfs_br.c > +++ b/net/bridge/br_sysfs_br.c > @@ -335,6 +335,28 @@ static ssize_t flush_store(struct device *d, > } > static DEVICE_ATTR_WO(flush); > > +static ssize_t max_fdb_count_show(struct device *d, struct device_attribute *attr, > + char *buf) > +{ > + struct net_bridge *br = to_bridge(d); > + return sprintf(buf, "%lu\n", br->max_fdb_count); > +} > + > +static ssize_t max_fdb_count_store(struct device *d, struct device_attribute *attr, > + const char *buf, size_t len) > +{ > + return store_bridge_parm(d, buf, len, br_set_max_fdb_count); > +} > +static DEVICE_ATTR_RW(max_fdb_count); > + > +static ssize_t fdb_count_show(struct device *d, struct device_attribute *attr, > + char *buf) > +{ > + struct net_bridge *br = to_bridge(d); > + return sprintf(buf, "%lu\n", br->fdb_count); > +} > +static DEVICE_ATTR_RO(fdb_count); > + > #ifdef CONFIG_BRIDGE_IGMP_SNOOPING > static ssize_t multicast_router_show(struct device *d, > struct device_attribute *attr, char *buf) > @@ -830,6 +852,8 @@ static ssize_t vlan_stats_enabled_store(struct device *d, > &dev_attr_gc_timer.attr, > &dev_attr_group_addr.attr, > &dev_attr_flush.attr, > + &dev_attr_max_fdb_count.attr, > + &dev_attr_fdb_count.attr, > #ifdef CONFIG_BRIDGE_IGMP_SNOOPING > &dev_attr_multicast_router.attr, > &dev_attr_multicast_snooping.attr, > Documentation/filesystems/sysfs.txt: All new sysfs attributes must be documented in Documentation/ABI. See also Documentation/ABI/README for more information. Egil ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-15 19:27 [PATCH] net: bridge: add max_fdb_count Sarah Newman ` (2 preceding siblings ...) 2017-11-15 21:34 ` Egil Hjelmeland @ 2017-11-16 3:01 ` Andrew Lunn 2017-11-16 7:31 ` Nikolay Aleksandrov 2017-11-21 14:53 ` David Laight 5 siblings, 0 replies; 27+ messages in thread From: Andrew Lunn @ 2017-11-16 3:01 UTC (permalink / raw) To: Sarah Newman; +Cc: netdev > @@ -556,6 +559,10 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, > if (hold_time(br) == 0) > return; > > + /* Place maximum on number of learned entries. */ > + if (br->max_fdb_count <= br->fdb_count) > + return; > + Hi Sarah We probably want a rate limited warning printed here. If the table is full, it will have to start flooding frames, which is not good for performance. Having something in the log will help debug the problem. The log will also give a hit that a DoS attack could be happening. Andrew ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-15 19:27 [PATCH] net: bridge: add max_fdb_count Sarah Newman ` (3 preceding siblings ...) 2017-11-16 3:01 ` Andrew Lunn @ 2017-11-16 7:31 ` Nikolay Aleksandrov 2017-11-16 9:20 ` Sarah Newman 2017-11-21 14:53 ` David Laight 5 siblings, 1 reply; 27+ messages in thread From: Nikolay Aleksandrov @ 2017-11-16 7:31 UTC (permalink / raw) To: Sarah Newman, netdev; +Cc: roopa On 15/11/17 21:27, Sarah Newman wrote: > Current memory and CPU usage for managing bridge fdb entries is unbounded. > Add a parameter max_fdb_count, controlled from sysfs, which places an upper > limit on the number of entries. Defaults to 1024. > > When max_fdb_count is met or exceeded, whether traffic is sent out a > given port should depend on its flooding behavior. > > This may instead be mitigated by filtering mac address entries in the > PREROUTING chain of the ebtables nat table, but this is only practical > when mac addresses are known in advance. > One alternative solution: if limit is the only requirement it could be done in user-space (even with a shell script) looking at fdb notifications and if you reach some limit then remove the learning flag from ports, later if enough expire turn it back on. In fact you can make any policy and if you catch an offending port - you can disable only its learning flag and leave the rest. > Signed-off-by: Sarah Newman <srn@prgmr.com> > --- > net/bridge/br_device.c | 2 ++ > net/bridge/br_fdb.c | 25 ++++++++++++++++++++----- > net/bridge/br_private.h | 3 +++ > net/bridge/br_sysfs_br.c | 24 ++++++++++++++++++++++++ > 4 files changed, 49 insertions(+), 5 deletions(-) > As others have mentioned placing a default limit would break existing users, so this must be a config option that is off (or very large) by default. > diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c > index af5b8c8..aa7a7f4 100644 > --- a/net/bridge/br_device.c > +++ b/net/bridge/br_device.c > @@ -432,6 +432,8 @@ void br_dev_setup(struct net_device *dev) > br->bridge_hello_time = br->hello_time = 2 * HZ; > br->bridge_forward_delay = br->forward_delay = 15 * HZ; > br->bridge_ageing_time = br->ageing_time = BR_DEFAULT_AGEING_TIME; > + br->max_fdb_count = 1024;> + br->fdb_count = 0; No need to zero this explicitly, it is already zero. > dev->max_mtu = ETH_MAX_MTU; > > br_netfilter_rtable_init(br); > diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c > index 4ea5c8b..0422d48 100644 > --- a/net/bridge/br_fdb.c > +++ b/net/bridge/br_fdb.c > @@ -179,6 +179,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) > > hlist_del_init_rcu(&f->hlist); > fdb_notify(br, f, RTM_DELNEIGH); > + br->fdb_count -= 1;> call_rcu(&f->rcu, fdb_rcu_free); > } > > @@ -478,7 +479,8 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, > return num; > } > > -static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, > +static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br, > + struct hlist_head *head, > struct net_bridge_port *source, > const unsigned char *addr, > __u16 vid, > @@ -498,6 +500,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, > fdb->added_by_external_learn = 0; > fdb->offloaded = 0; > fdb->updated = fdb->used = jiffies; > + br->fdb_count += 1; > hlist_add_head_rcu(&fdb->hlist, head); > } > return fdb; > @@ -524,7 +527,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, > fdb_delete(br, fdb); > } > > - fdb = fdb_create(head, source, addr, vid, 1, 1); > + fdb = fdb_create(br, head, source, addr, vid, 1, 1); > if (!fdb) > return -ENOMEM; > > @@ -556,6 +559,10 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, > if (hold_time(br) == 0) > return; > > + /* Place maximum on number of learned entries. */ > + if (br->max_fdb_count <= br->fdb_count) > + return; > + checking for fdb_count in the beginning of br_fdb_update would cause stale fdb entries, that is fdb moves between ports will stop and fdb updates will stop too (i.e. everything expires) > /* ignore packets unless we are using this port */ > if (!(source->state == BR_STATE_LEARNING || > source->state == BR_STATE_FORWARDING)) > @@ -591,7 +598,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, > } else { > spin_lock(&br->hash_lock); > if (likely(!fdb_find_rcu(head, addr, vid))) { > - fdb = fdb_create(head, source, addr, vid, 0, 0); > + fdb = fdb_create(br, head, source, addr, vid, 0, 0); > if (fdb) { > if (unlikely(added_by_user)) > fdb->added_by_user = 1; > @@ -787,7 +794,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, > if (!(flags & NLM_F_CREATE)) > return -ENOENT; > > - fdb = fdb_create(head, source, addr, vid, 0, 0); > + fdb = fdb_create(br, head, source, addr, vid, 0, 0); > if (!fdb) > return -ENOMEM; > > @@ -1081,7 +1088,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, > head = &br->hash[br_mac_hash(addr, vid)]; > fdb = br_fdb_find(br, addr, vid); > if (!fdb) { > - fdb = fdb_create(head, p, addr, vid, 0, 0); > + fdb = fdb_create(br, head, p, addr, vid, 0, 0); > if (!fdb) { > err = -ENOMEM; > goto err_unlock; > @@ -1147,3 +1154,11 @@ void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p, > > spin_unlock_bh(&br->hash_lock); > } > + > +int br_set_max_fdb_count(struct net_bridge *br, unsigned long max_fdb_count) > +{ > + spin_lock_bh(&br->lock); > + br->max_fdb_count = max_fdb_count; > + spin_unlock_bh(&br->lock); No need to take the spin lock here as this value is checked without it anyway, and all bridge dev option changes are done under rtnl lock already. > + return 0; > +} > diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h > index 1312b8d..98aae1f 100644 > --- a/net/bridge/br_private.h > +++ b/net/bridge/br_private.h > @@ -343,6 +343,8 @@ struct net_bridge { > unsigned long bridge_hello_time; > unsigned long bridge_forward_delay; > unsigned long bridge_ageing_time; > + unsigned long max_fdb_count; > + unsigned long fdb_count;> > u8 group_addr[ETH_ALEN]; > bool group_addr_set; > @@ -549,6 +551,7 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, > const unsigned char *addr, u16 vid); > void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p, > const unsigned char *addr, u16 vid); > +int br_set_max_fdb_count(struct net_bridge *br, unsigned long x); > > /* br_forward.c */ > enum br_pkt_type { > diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c > index 723f25e..18fabdf 100644 > --- a/net/bridge/br_sysfs_br.c > +++ b/net/bridge/br_sysfs_br.c > @@ -335,6 +335,28 @@ static ssize_t flush_store(struct device *d, > } > static DEVICE_ATTR_WO(flush); > > +static ssize_t max_fdb_count_show(struct device *d, struct device_attribute *attr, > + char *buf) > +{ > + struct net_bridge *br = to_bridge(d); > + return sprintf(buf, "%lu\n", br->max_fdb_count); > +} > + > +static ssize_t max_fdb_count_store(struct device *d, struct device_attribute *attr, > + const char *buf, size_t len) > +{ > + return store_bridge_parm(d, buf, len, br_set_max_fdb_count); > +} > +static DEVICE_ATTR_RW(max_fdb_count); > + > +static ssize_t fdb_count_show(struct device *d, struct device_attribute *attr, > + char *buf) > +{ > + struct net_bridge *br = to_bridge(d); > + return sprintf(buf, "%lu\n", br->fdb_count); > +} > +static DEVICE_ATTR_RO(fdb_count); > + > #ifdef CONFIG_BRIDGE_IGMP_SNOOPING > static ssize_t multicast_router_show(struct device *d, > struct device_attribute *attr, char *buf) > @@ -830,6 +852,8 @@ static ssize_t vlan_stats_enabled_store(struct device *d, > &dev_attr_gc_timer.attr, > &dev_attr_group_addr.attr, > &dev_attr_flush.attr, > + &dev_attr_max_fdb_count.attr, > + &dev_attr_fdb_count.attr, > #ifdef CONFIG_BRIDGE_IGMP_SNOOPING > &dev_attr_multicast_router.attr, > &dev_attr_multicast_snooping.attr, > ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-16 7:31 ` Nikolay Aleksandrov @ 2017-11-16 9:20 ` Sarah Newman 2017-11-16 9:49 ` Nikolay Aleksandrov 2017-11-16 9:58 ` Willy Tarreau 0 siblings, 2 replies; 27+ messages in thread From: Sarah Newman @ 2017-11-16 9:20 UTC (permalink / raw) To: Nikolay Aleksandrov, netdev; +Cc: roopa On 11/15/2017 11:31 PM, Nikolay Aleksandrov wrote: > On 15/11/17 21:27, Sarah Newman wrote: >> Current memory and CPU usage for managing bridge fdb entries is unbounded. >> Add a parameter max_fdb_count, controlled from sysfs, which places an upper >> limit on the number of entries. Defaults to 1024. >> >> When max_fdb_count is met or exceeded, whether traffic is sent out a >> given port should depend on its flooding behavior. >> >> This may instead be mitigated by filtering mac address entries in the >> PREROUTING chain of the ebtables nat table, but this is only practical >> when mac addresses are known in advance. >> > > One alternative solution: if limit is the only requirement it could be done > in user-space (even with a shell script) looking at fdb notifications and > if you reach some limit then remove the learning flag from ports, later if > enough expire turn it back on. In fact you can make any policy and if you > catch an offending port - you can disable only its learning flag and leave the > rest. Leaving such a trivial DOS in the kernel doesn't seem like a good idea to me, but I suppose it hasn't bothered anyone else up to now except this person back in 2013: https://www.keypressure.com/blog/linux-bridge-port-security/ I note that anyone who would run up against a too-low limit on the maximum number of fdb entries would also be savvy enough to fix it in a matter of minutes. They could also default the limit to U32_MAX in their particular distribution if it was a configuration option. At the moment there is not even a single log message if the problem doesn't result in memory exhaustion. --Sarah ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-16 9:20 ` Sarah Newman @ 2017-11-16 9:49 ` Nikolay Aleksandrov 2017-11-16 9:58 ` Willy Tarreau 1 sibling, 0 replies; 27+ messages in thread From: Nikolay Aleksandrov @ 2017-11-16 9:49 UTC (permalink / raw) To: Sarah Newman, netdev; +Cc: roopa On 16/11/17 11:20, Sarah Newman wrote: > On 11/15/2017 11:31 PM, Nikolay Aleksandrov wrote: >> On 15/11/17 21:27, Sarah Newman wrote: >>> Current memory and CPU usage for managing bridge fdb entries is unbounded. >>> Add a parameter max_fdb_count, controlled from sysfs, which places an upper >>> limit on the number of entries. Defaults to 1024. >>> >>> When max_fdb_count is met or exceeded, whether traffic is sent out a >>> given port should depend on its flooding behavior. >>> >>> This may instead be mitigated by filtering mac address entries in the >>> PREROUTING chain of the ebtables nat table, but this is only practical >>> when mac addresses are known in advance. >>> >> >> One alternative solution: if limit is the only requirement it could be done >> in user-space (even with a shell script) looking at fdb notifications and >> if you reach some limit then remove the learning flag from ports, later if >> enough expire turn it back on. In fact you can make any policy and if you >> catch an offending port - you can disable only its learning flag and leave the >> rest. > > Leaving such a trivial DOS in the kernel doesn't seem like a good idea to me, > but I suppose it hasn't bothered anyone else up to now except this person > back in 2013: https://www.keypressure.com/blog/linux-bridge-port-security/ > Having L2 access means many more things are possible, that being said I'm not against adding the limit just need to make it optional with the default either being no limit or a very high one (e.g. u32 max). > I note that anyone who would run up against a too-low limit on the maximum > number of fdb entries would also be savvy enough to fix it in a matter of > minutes. They could also default the limit to U32_MAX in their particular > distribution if it was a configuration option. The same argument applies to anyone who wants to limit their fdbs, and the standard way to add new options in the Linux bridge which affect normal operations is to keep the current behaviour and enable the new one on user request in order to avoid breaking already existing setups. In fact we try very hard to keep existing setups running. > > At the moment there is not even a single log message if the problem doesn't > result in memory exhaustion. > > --Sarah > ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-16 9:20 ` Sarah Newman 2017-11-16 9:49 ` Nikolay Aleksandrov @ 2017-11-16 9:58 ` Willy Tarreau 2017-11-16 18:23 ` Sarah Newman 1 sibling, 1 reply; 27+ messages in thread From: Willy Tarreau @ 2017-11-16 9:58 UTC (permalink / raw) To: Sarah Newman; +Cc: Nikolay Aleksandrov, netdev, roopa Hi Sarah, On Thu, Nov 16, 2017 at 01:20:18AM -0800, Sarah Newman wrote: > I note that anyone who would run up against a too-low limit on the maximum > number of fdb entries would also be savvy enough to fix it in a matter of > minutes. I disagree on this point. There's a huge difference between experiencing sudden breakage under normal conditions due to arbitrary limits being set and being down because of an attack. While the latter is not desirable, it's much more easily accepted and most often requires operations anyway. The former is never an option. And I continue to think that the default behaviour once the limit is reached must not be to prevent new entries from being learned but to purge older ones. At least it preserves normal operations. But given the high CPU impact you reported for a very low load, definitely something needs to be done. > They could also default the limit to U32_MAX in their particular > distribution if it was a configuration option. Well, I'd say that we don't have a default limit on the socket number either and that it happens to be the expected behaviour. It's almost impossible to find a suitable limit for everyone. People dealing with regular loads never read docs and get caught. People working in hostile environments are always more careful and will ensure that their limits are properly set. > At the moment there is not even a single log message if the problem doesn't > result in memory exhaustion. This probably needs to be addressed as well! Regards, Willy ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-16 9:58 ` Willy Tarreau @ 2017-11-16 18:23 ` Sarah Newman 2017-11-16 19:23 ` Andrew Lunn 0 siblings, 1 reply; 27+ messages in thread From: Sarah Newman @ 2017-11-16 18:23 UTC (permalink / raw) To: Willy Tarreau; +Cc: Nikolay Aleksandrov, netdev, roopa On 11/16/2017 01:58 AM, Willy Tarreau wrote: > Hi Sarah, > > On Thu, Nov 16, 2017 at 01:20:18AM -0800, Sarah Newman wrote: >> I note that anyone who would run up against a too-low limit on the maximum >> number of fdb entries would also be savvy enough to fix it in a matter of >> minutes. > > I disagree on this point. There's a huge difference between experiencing > sudden breakage under normal conditions due to arbitrary limits being set > and being down because of an attack. While the latter is not desirable, > it's much more easily accepted and most often requires operations anyway. > The former is never an option. Yes, being down during an attack is expected, assuming you know you are being attacked. Linux bridges can also be used in small embedded devices. With no limit, the likely result from those devices being attacked is the device gets thrown away for being unreliable. > > And I continue to think that the default behaviour once the limit is reached > must not be to prevent new entries from being learned but to purge older > ones. At least it preserves normal operations. I'm not disagreeing. I spent maybe a couple of hours on this patch and was hoping someone else would find more time to spend on the problem. > > But given the high CPU impact you reported for a very low load, definitely > something needs to be done It's nice to think so. > >> They could also default the limit to U32_MAX in their particular >> distribution if it was a configuration option. > > Well, I'd say that we don't have a default limit on the socket number either > and that it happens to be the expected behaviour. It's almost impossible to > find a suitable limit for everyone. People dealing with regular loads never > read docs and get caught. People working in hostile environments are always > more careful and will ensure that their limits are properly set. Neighbor tables for ipv4/ipv6 seem more comparable. gc_thresh3 is 1024 and typically needs to be adjusted higher for Linux routers. As you say, there is no default limit that suits everyone. So the question is really who is burdened with changing the default. There is a lot of talk of not breaking existing users. The current implementation is demonstrably vulnerable, and since the problem is likely silent there's not a good way to know how often it's actually occurred. I note the tool to trigger it is trivially available and it's a well-known type of attack. But I understand if there have been an insufficient known number of attacks to change the current default situation. It could be left to user space to make a new default if it becomes a demonstrable problem. If user space has to change at all you could again argue for a pure user-space solution, but I'm not sure if a pure user-space solution would always have a chance to fix the problem before the system was brought down. > >> At the moment there is not even a single log message if the problem doesn't >> result in memory exhaustion. > > This probably needs to be addressed as well Maybe what's needed is two thresholds, one for warning and one for enforcement. The warning limit would need to be low enough that the information had a good chance of being logged before the system was under too much load to be able to convey that information. The enforcement limit could be left as default inactive until shown that it needed to be otherwise. --Sarah ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-16 18:23 ` Sarah Newman @ 2017-11-16 19:23 ` Andrew Lunn 2017-11-16 19:36 ` Nikolay Aleksandrov 2017-11-16 20:21 ` Vincent Bernat 0 siblings, 2 replies; 27+ messages in thread From: Andrew Lunn @ 2017-11-16 19:23 UTC (permalink / raw) To: Sarah Newman; +Cc: Willy Tarreau, Nikolay Aleksandrov, netdev, roopa > Linux bridges can also be used in small embedded devices. With no limit, > the likely result from those devices being attacked is the device gets > thrown away for being unreliable. Hi Sarah Just to get a gut feeling... struct net_bridge_fdb_entry is 40 bytes. My WiFi access point which is also a 5 port bridge, currently has 97MB free RAM. That is space for about 2.5M FDB entries. So even Roopa's 128K is not really a problem, in terms of memory. > Maybe what's needed is two thresholds, one for warning and one for enforcement. > The warning limit would need to be low enough that the information had a good chance > of being logged before the system was under too much load to be able to convey > that information. The enforcement limit could be left as default inactive until > shown that it needed to be otherwise. What exactly is the problem here? Does the DoS exhaust memory, or does the hashing algorithm not scale? It is more work, but the table could be more closely tied to the memory management code. When memory is getting low, callbacks are made asking to free up memory. Register such a callback and throw away part of the table when memory is getting low. There is then no need to limit the size, but print a rate limited warning when asked to reduce the size. Andrew ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-16 19:23 ` Andrew Lunn @ 2017-11-16 19:36 ` Nikolay Aleksandrov 2017-11-16 20:54 ` Sarah Newman 2017-11-16 20:21 ` Vincent Bernat 1 sibling, 1 reply; 27+ messages in thread From: Nikolay Aleksandrov @ 2017-11-16 19:36 UTC (permalink / raw) To: Andrew Lunn, Sarah Newman; +Cc: Willy Tarreau, netdev, roopa On 16 November 2017 21:23:25 EET, Andrew Lunn <andrew@lunn.ch> wrote: >> Linux bridges can also be used in small embedded devices. With no >limit, >> the likely result from those devices being attacked is the device >gets >> thrown away for being unreliable. > >Hi Sarah > >Just to get a gut feeling... > >struct net_bridge_fdb_entry is 40 bytes. > >My WiFi access point which is also a 5 port bridge, currently has 97MB >free RAM. That is space for about 2.5M FDB entries. So even Roopa's >128K is not really a problem, in terms of memory. > >> Maybe what's needed is two thresholds, one for warning and one for >enforcement. >> The warning limit would need to be low enough that the information >had a good chance >> of being logged before the system was under too much load to be able >to convey >> that information. The enforcement limit could be left as default >inactive until >> shown that it needed to be otherwise. > >What exactly is the problem here? Does the DoS exhaust memory, or does >the hashing algorithm not scale? Just a note - when net-next opens I'll send patches which move the fdb to a resizeable hashtable that scales nicely even with hundreds of thousands of entries so only the memory issue will remain. > >It is more work, but the table could be more closely tied to the >memory management code. When memory is getting low, callbacks are made >asking to free up memory. Register such a callback and throw away part >of the table when memory is getting low. There is then no need to >limit the size, but print a rate limited warning when asked to reduce >the size. > > Andrew -- Sent from my Android device with K-9 Mail. Please excuse my brevity. ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-16 19:36 ` Nikolay Aleksandrov @ 2017-11-16 20:54 ` Sarah Newman 0 siblings, 0 replies; 27+ messages in thread From: Sarah Newman @ 2017-11-16 20:54 UTC (permalink / raw) To: Nikolay Aleksandrov, Andrew Lunn; +Cc: Willy Tarreau, netdev, roopa On 11/16/2017 11:36 AM, Nikolay Aleksandrov wrote: > On 16 November 2017 21:23:25 EET, Andrew Lunn <andrew@lunn.ch> wrote: >>> Linux bridges can also be used in small embedded devices. With no >> limit, >>> the likely result from those devices being attacked is the device >> gets >>> thrown away for being unreliable. >> >> Hi Sarah >> >> Just to get a gut feeling... >> >> struct net_bridge_fdb_entry is 40 bytes. >> >> My WiFi access point which is also a 5 port bridge, currently has 97MB >> free RAM. That is space for about 2.5M FDB entries. So even Roopa's >> 128K is not really a problem, in terms of memory. The recommendation was a default maximum of ~4B entries rather than 128k or 256k entries. 2.5M entries over a 300 second aging period is ~8.3kpps. >>> Maybe what's needed is two thresholds, one for warning and one for >> enforcement. >>> The warning limit would need to be low enough that the information >> had a good chance >>> of being logged before the system was under too much load to be able >> to convey >>> that information. The enforcement limit could be left as default >> inactive until >>> shown that it needed to be otherwise. >> >> What exactly is the problem here? Does the DoS exhaust memory, or does >> the hashing algorithm not scale? My personal observation was 100% CPU usage, not memory exhaustion. Others have documented memory exhaustion. > > Just a note - when net-next opens I'll send patches > which move the fdb to a resizeable hashtable that scales nicely even with hundreds of thousands of entries so only the memory issue will remain. Thank you. I believe that under attack, the number of entries could exceed hundreds of thousands when accumulated over the default aging time. Perhaps it would still make sense to support a hard limit, even if it is quite high by default? > >> >> It is more work, but the table could be more closely tied to the >> memory management code. When memory is getting low, callbacks are made >> asking to free up memory. Register such a callback and throw away part >> of the table when memory is getting low. There is then no need to >> limit the size, but print a rate limited warning when asked to reduce >> the size. That sounds reasonable, though I think it would only trigger in the small embedded devices before CPU usage became an issue. --Sarah ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-16 19:23 ` Andrew Lunn 2017-11-16 19:36 ` Nikolay Aleksandrov @ 2017-11-16 20:21 ` Vincent Bernat 2017-11-17 0:27 ` Stephen Hemminger 1 sibling, 1 reply; 27+ messages in thread From: Vincent Bernat @ 2017-11-16 20:21 UTC (permalink / raw) To: Andrew Lunn Cc: Sarah Newman, Willy Tarreau, Nikolay Aleksandrov, netdev, roopa ❦ 16 novembre 2017 20:23 +0100, Andrew Lunn <andrew@lunn.ch> : > struct net_bridge_fdb_entry is 40 bytes. > > My WiFi access point which is also a 5 port bridge, currently has 97MB > free RAM. That is space for about 2.5M FDB entries. So even Roopa's > 128K is not really a problem, in terms of memory. I am also interested in Sarah's patch because we can now have bridge with many ports through VXLAN. The FDB can be replicated to an external daemon with BGP and the cost of each additional MAC address is therefore higher than just a few bytes. It seems simpler to implement a limiting policy early (at the port or bridge level). Also, this is a pretty standard limit to have for a bridge (switchport port-security maximum on Cisco, set interface X mac-limit on Juniper). And it's not something easy to do with ebtables. -- Use the good features of a language; avoid the bad ones. - The Elements of Programming Style (Kernighan & Plauger) ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-16 20:21 ` Vincent Bernat @ 2017-11-17 0:27 ` Stephen Hemminger 2017-11-17 5:26 ` Willy Tarreau 0 siblings, 1 reply; 27+ messages in thread From: Stephen Hemminger @ 2017-11-17 0:27 UTC (permalink / raw) To: Vincent Bernat Cc: Andrew Lunn, Sarah Newman, Willy Tarreau, Nikolay Aleksandrov, netdev, roopa On Thu, 16 Nov 2017 21:21:55 +0100 Vincent Bernat <bernat@luffy.cx> wrote: > ❦ 16 novembre 2017 20:23 +0100, Andrew Lunn <andrew@lunn.ch> : > > > struct net_bridge_fdb_entry is 40 bytes. > > > > My WiFi access point which is also a 5 port bridge, currently has 97MB > > free RAM. That is space for about 2.5M FDB entries. So even Roopa's > > 128K is not really a problem, in terms of memory. > > I am also interested in Sarah's patch because we can now have bridge > with many ports through VXLAN. The FDB can be replicated to an external > daemon with BGP and the cost of each additional MAC address is therefore > higher than just a few bytes. It seems simpler to implement a limiting > policy early (at the port or bridge level). > > Also, this is a pretty standard limit to have for a bridge (switchport > port-security maximum on Cisco, set interface X mac-limit on > Juniper). And it's not something easy to do with ebtables. I want an optional limit per port, it makes a lot of sense. If for no other reason that huge hash tables are a performance problems. There is a bigger question about which fdb to evict but just dropping the new one seems to be easiest and as good as any other solution. ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-17 0:27 ` Stephen Hemminger @ 2017-11-17 5:26 ` Willy Tarreau 2017-11-17 6:14 ` Nikolay Aleksandrov 2017-11-17 14:06 ` Andrew Lunn 0 siblings, 2 replies; 27+ messages in thread From: Willy Tarreau @ 2017-11-17 5:26 UTC (permalink / raw) To: Stephen Hemminger Cc: Vincent Bernat, Andrew Lunn, Sarah Newman, Nikolay Aleksandrov, netdev, roopa Hi Stephen, On Thu, Nov 16, 2017 at 04:27:18PM -0800, Stephen Hemminger wrote: > On Thu, 16 Nov 2017 21:21:55 +0100 > Vincent Bernat <bernat@luffy.cx> wrote: > > > ? 16 novembre 2017 20:23 +0100, Andrew Lunn <andrew@lunn.ch> : > > > > > struct net_bridge_fdb_entry is 40 bytes. > > > > > > My WiFi access point which is also a 5 port bridge, currently has 97MB > > > free RAM. That is space for about 2.5M FDB entries. So even Roopa's > > > 128K is not really a problem, in terms of memory. > > > > I am also interested in Sarah's patch because we can now have bridge > > with many ports through VXLAN. The FDB can be replicated to an external > > daemon with BGP and the cost of each additional MAC address is therefore > > higher than just a few bytes. It seems simpler to implement a limiting > > policy early (at the port or bridge level). > > > > Also, this is a pretty standard limit to have for a bridge (switchport > > port-security maximum on Cisco, set interface X mac-limit on > > Juniper). And it's not something easy to do with ebtables. > > I want an optional limit per port, it makes a lot of sense. > If for no other reason that huge hash tables are a performance problems. Except its not a limit in that it doesn't prevent new traffic from going in, it only prevents new MACs from being learned, so suddenly you start flooding all ports with new traffic once the limit is reached, which is not trivial to detect nor diagnose. > There is a bigger question about which fdb to evict but just dropping the > new one seems to be easiest and as good as any other solution. Usually it's better to apply LRU or random here in my opinion, as the new entry is much more likely to be needed than older ones by definition. In terms of CPU usage it may even be better to kill an entire series in the hash table (eg: all nodes in the same table entry for example), as the operation can be almost as cheap and result in not being needed for a while again. Willy ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-17 5:26 ` Willy Tarreau @ 2017-11-17 6:14 ` Nikolay Aleksandrov 2017-11-17 8:01 ` Nikolay Aleksandrov 2017-11-17 14:06 ` Andrew Lunn 1 sibling, 1 reply; 27+ messages in thread From: Nikolay Aleksandrov @ 2017-11-17 6:14 UTC (permalink / raw) To: Willy Tarreau, Stephen Hemminger Cc: Vincent Bernat, Andrew Lunn, Sarah Newman, netdev, roopa On 17/11/17 07:26, Willy Tarreau wrote: > Hi Stephen, > > On Thu, Nov 16, 2017 at 04:27:18PM -0800, Stephen Hemminger wrote: >> On Thu, 16 Nov 2017 21:21:55 +0100 >> Vincent Bernat <bernat@luffy.cx> wrote: >> >>> ? 16 novembre 2017 20:23 +0100, Andrew Lunn <andrew@lunn.ch> : >>> >>>> struct net_bridge_fdb_entry is 40 bytes. >>>> >>>> My WiFi access point which is also a 5 port bridge, currently has 97MB >>>> free RAM. That is space for about 2.5M FDB entries. So even Roopa's >>>> 128K is not really a problem, in terms of memory. >>> >>> I am also interested in Sarah's patch because we can now have bridge >>> with many ports through VXLAN. The FDB can be replicated to an external >>> daemon with BGP and the cost of each additional MAC address is therefore >>> higher than just a few bytes. It seems simpler to implement a limiting >>> policy early (at the port or bridge level). >>> >>> Also, this is a pretty standard limit to have for a bridge (switchport >>> port-security maximum on Cisco, set interface X mac-limit on >>> Juniper). And it's not something easy to do with ebtables. >> >> I want an optional limit per port, it makes a lot of sense. >> If for no other reason that huge hash tables are a performance problems. > > Except its not a limit in that it doesn't prevent new traffic from going > in, it only prevents new MACs from being learned, so suddenly you start > flooding all ports with new traffic once the limit is reached, which is > not trivial to detect nor diagnose. > >> There is a bigger question about which fdb to evict but just dropping the >> new one seems to be easiest and as good as any other solution. > > Usually it's better to apply LRU or random here in my opinion, as the > new entry is much more likely to be needed than older ones by definition. > In terms of CPU usage it may even be better to kill an entire series in > the hash table (eg: all nodes in the same table entry for example), as > the operation can be almost as cheap and result in not being needed for > a while again. > > Willy > Hi, I have been thinking about this and how to try and keep everyone happy while maintaining performance, so how about this: * Add a per-port fdb LRU list which is used only when there are >= 2000 global fdb entries _or_ a per-port limit is set. If the list is in use, update the fdb list position once per second. I think these properties will allow us to scale and have a better LRU locking granularity (per-port), and in smaller setups (not needing LRU) the cost will be a single test in fast path. * Use the above LRU list for per-port limit evictions * More importantly use the LRU list for fdb expire, removing the need to walk over all fdbs every time we expire entries. This would be of great help for larger setups with many fdbs (it will kick in on >= 2000 fdb entries). * (optional) Make the global LRU kick in limit an option, people might want to minimize traffic blocking due to expire process. Any comments and suggestions are welcome. When we agree on the details I'll do the RFC patches and run some tests before submitting. Defaults will be kept as they are now. I've chosen the 2000 limit arbitrarily and am happy to change it if people have something else in mind. This should play nice with the resizeable hashtable change. Thanks, Nik ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-17 6:14 ` Nikolay Aleksandrov @ 2017-11-17 8:01 ` Nikolay Aleksandrov 0 siblings, 0 replies; 27+ messages in thread From: Nikolay Aleksandrov @ 2017-11-17 8:01 UTC (permalink / raw) To: Willy Tarreau, Stephen Hemminger Cc: Vincent Bernat, Andrew Lunn, Sarah Newman, netdev, roopa On 17/11/17 08:14, Nikolay Aleksandrov wrote: > On 17/11/17 07:26, Willy Tarreau wrote: >> Hi Stephen, >> >> On Thu, Nov 16, 2017 at 04:27:18PM -0800, Stephen Hemminger wrote: >>> On Thu, 16 Nov 2017 21:21:55 +0100 >>> Vincent Bernat <bernat@luffy.cx> wrote: >>> >>>> ? 16 novembre 2017 20:23 +0100, Andrew Lunn <andrew@lunn.ch> : >>>> >>>>> struct net_bridge_fdb_entry is 40 bytes. >>>>> >>>>> My WiFi access point which is also a 5 port bridge, currently has 97MB >>>>> free RAM. That is space for about 2.5M FDB entries. So even Roopa's >>>>> 128K is not really a problem, in terms of memory. >>>> >>>> I am also interested in Sarah's patch because we can now have bridge >>>> with many ports through VXLAN. The FDB can be replicated to an external >>>> daemon with BGP and the cost of each additional MAC address is therefore >>>> higher than just a few bytes. It seems simpler to implement a limiting >>>> policy early (at the port or bridge level). >>>> >>>> Also, this is a pretty standard limit to have for a bridge (switchport >>>> port-security maximum on Cisco, set interface X mac-limit on >>>> Juniper). And it's not something easy to do with ebtables. >>> >>> I want an optional limit per port, it makes a lot of sense. >>> If for no other reason that huge hash tables are a performance problems. >> >> Except its not a limit in that it doesn't prevent new traffic from going >> in, it only prevents new MACs from being learned, so suddenly you start >> flooding all ports with new traffic once the limit is reached, which is >> not trivial to detect nor diagnose. >> >>> There is a bigger question about which fdb to evict but just dropping the >>> new one seems to be easiest and as good as any other solution. >> >> Usually it's better to apply LRU or random here in my opinion, as the >> new entry is much more likely to be needed than older ones by definition. >> In terms of CPU usage it may even be better to kill an entire series in >> the hash table (eg: all nodes in the same table entry for example), as >> the operation can be almost as cheap and result in not being needed for >> a while again. >> >> Willy >> > > Hi, > I have been thinking about this and how to try and keep everyone happy > while maintaining performance, so how about this: > > * Add a per-port fdb LRU list which is used only when there are >= 2000 > global fdb entries _or_ a per-port limit is set. If the list is in use, > update the fdb list position once per second. I think these properties will > allow us to scale and have a better LRU locking granularity (per-port), and in > smaller setups (not needing LRU) the cost will be a single test in fast path. > It seems I spoke too soon, I just tried a quick and dirty version of this and the per-port LRU becomes a nightmare with the completely lockless fdb dst port changes. So I think going back to the original proposition with new fdb dropping is best for now, otherwise we need to add some synchronization on fdb port move. In general the per-port locks would be tricky to handle with moving fdbs, so even if we add a per-port LRU list we'll need some global lock to handle moves and updates in a simple manner, and I'd like to avoid adding a new bridge lock. > * Use the above LRU list for per-port limit evictions > > * More importantly use the LRU list for fdb expire, removing the need to walk > over all fdbs every time we expire entries. This would be of great help for > larger setups with many fdbs (it will kick in on >= 2000 fdb entries). > Will have to drop the above point for now, even though I was mostly interested in that. > * (optional) Make the global LRU kick in limit an option, people might want to > minimize traffic blocking due to expire process. > > Any comments and suggestions are welcome. When we agree on the details I'll do > the RFC patches and run some tests before submitting. Defaults will be kept as > they are now. I've chosen the 2000 limit arbitrarily and am happy to change it > if people have something else in mind. This should play nice with the resizeable > hashtable change. > > Thanks, > Nik > > > > > ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-17 5:26 ` Willy Tarreau 2017-11-17 6:14 ` Nikolay Aleksandrov @ 2017-11-17 14:06 ` Andrew Lunn 2017-11-17 18:44 ` Willy Tarreau 1 sibling, 1 reply; 27+ messages in thread From: Andrew Lunn @ 2017-11-17 14:06 UTC (permalink / raw) To: Willy Tarreau Cc: Stephen Hemminger, Vincent Bernat, Sarah Newman, Nikolay Aleksandrov, netdev, roopa > Usually it's better to apply LRU or random here in my opinion, as the > new entry is much more likely to be needed than older ones by definition. Hi Willy I think this depends on why you need to discard. If it is normal operation and the limits are simply too low, i would agree. If however it is a DoS, throwing away the new entries makes sense, leaving the old ones which are more likely to be useful. Most of the talk in this thread has been about limits for DoS prevention... Andrew ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: bridge: add max_fdb_count 2017-11-17 14:06 ` Andrew Lunn @ 2017-11-17 18:44 ` Willy Tarreau 0 siblings, 0 replies; 27+ messages in thread From: Willy Tarreau @ 2017-11-17 18:44 UTC (permalink / raw) To: Andrew Lunn Cc: Stephen Hemminger, Vincent Bernat, Sarah Newman, Nikolay Aleksandrov, netdev, roopa Hi Andrew, On Fri, Nov 17, 2017 at 03:06:23PM +0100, Andrew Lunn wrote: > > Usually it's better to apply LRU or random here in my opinion, as the > > new entry is much more likely to be needed than older ones by definition. > > Hi Willy > > I think this depends on why you need to discard. If it is normal > operation and the limits are simply too low, i would agree. > > If however it is a DoS, throwing away the new entries makes sense, > leaving the old ones which are more likely to be useful. > > Most of the talk in this thread has been about limits for DoS > prevention... Sure but my point is that it can kick in on regular traffic and in this case it can be catastrophic. That's only what bothers me. If we have an unlimited default value with this algorithm I'm fine because nobody will get caught by accident with a bridge suddenly replicating high traffic on all ports because an unknown limit was reached. That's the principle of least surprise. I know that when fighting DoSes there's never any universally good solutions and one has to make tradeoffs. I'm perfectly fine with this. Cheers, Willy ^ permalink raw reply [flat|nested] 27+ messages in thread
* RE: [PATCH] net: bridge: add max_fdb_count 2017-11-15 19:27 [PATCH] net: bridge: add max_fdb_count Sarah Newman ` (4 preceding siblings ...) 2017-11-16 7:31 ` Nikolay Aleksandrov @ 2017-11-21 14:53 ` David Laight 5 siblings, 0 replies; 27+ messages in thread From: David Laight @ 2017-11-21 14:53 UTC (permalink / raw) To: 'Sarah Newman', netdev@vger.kernel.org From: Sarah Newman > Sent: 15 November 2017 19:27 > Current memory and CPU usage for managing bridge fdb entries is unbounded. > Add a parameter max_fdb_count, controlled from sysfs, which places an upper > limit on the number of entries. Defaults to 1024. > > When max_fdb_count is met or exceeded, whether traffic is sent out a > given port should depend on its flooding behavior. Does it make sense for a bridge to run in a mode where it doesn't remember (all the) MAC addresses from one of its interfaces? Rather than flood unknown addresses they are just sent to the 'everywhere else' interface. David ^ permalink raw reply [flat|nested] 27+ messages in thread
end of thread, other threads:[~2017-11-21 14:53 UTC | newest] Thread overview: 27+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2017-11-15 19:27 [PATCH] net: bridge: add max_fdb_count Sarah Newman 2017-11-15 19:43 ` Sarah Newman 2017-11-15 20:04 ` Stephen Hemminger 2017-11-16 2:25 ` Andrew Lunn 2017-11-16 4:05 ` Toshiaki Makita 2017-11-16 4:54 ` Sarah Newman 2017-11-16 6:13 ` Toshiaki Makita 2017-11-16 6:20 ` Roopa Prabhu 2017-11-16 16:54 ` Stephen Hemminger 2017-11-15 21:34 ` Egil Hjelmeland 2017-11-16 3:01 ` Andrew Lunn 2017-11-16 7:31 ` Nikolay Aleksandrov 2017-11-16 9:20 ` Sarah Newman 2017-11-16 9:49 ` Nikolay Aleksandrov 2017-11-16 9:58 ` Willy Tarreau 2017-11-16 18:23 ` Sarah Newman 2017-11-16 19:23 ` Andrew Lunn 2017-11-16 19:36 ` Nikolay Aleksandrov 2017-11-16 20:54 ` Sarah Newman 2017-11-16 20:21 ` Vincent Bernat 2017-11-17 0:27 ` Stephen Hemminger 2017-11-17 5:26 ` Willy Tarreau 2017-11-17 6:14 ` Nikolay Aleksandrov 2017-11-17 8:01 ` Nikolay Aleksandrov 2017-11-17 14:06 ` Andrew Lunn 2017-11-17 18:44 ` Willy Tarreau 2017-11-21 14:53 ` David Laight
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).