* Re: [PATCH 03/38] mlx4: Convert qp_table_tree to XArray
From: Saeed Mahameed @ 2019-08-27 19:18 UTC (permalink / raw)
To: willy@infradead.org, netdev@vger.kernel.org
In-Reply-To: <20190820223259.22348-4-willy@infradead.org>
On Tue, 2019-08-20 at 15:32 -0700, Matthew Wilcox wrote:
> From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
>
> This XArray appears to be modifiable from interrupt context, so we
> have
> to be a little more careful with the locking. However, the lookup
> can
> be done without the spinlock held. I cannot determine whether
> mlx4_qp_alloc() is allowed to sleep, so I've retained the GFP_ATOMIC
> there, but it could be turned into GFP_KERNEL if the callers can
> tolerate it sleeping.
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
> drivers/net/ethernet/mellanox/mlx4/mlx4.h | 3 +-
> drivers/net/ethernet/mellanox/mlx4/qp.c | 37 ++++++---------------
> --
> include/linux/mlx4/device.h | 4 +--
> include/linux/mlx4/qp.h | 2 +-
> 4 files changed, 14 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> index b6fe22bee9f4..aaece8480da7 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> @@ -38,7 +38,7 @@
> #define MLX4_H
>
> #include <linux/mutex.h>
> -#include <linux/radix-tree.h>
> +#include <linux/xarray.h>
> #include <linux/rbtree.h>
> #include <linux/timer.h>
> #include <linux/semaphore.h>
> @@ -716,7 +716,6 @@ struct mlx4_qp_table {
> u32 zones_uids[MLX4_QP_TABLE_ZONE_NUM];
> u32 rdmarc_base;
> int rdmarc_shift;
> - spinlock_t lock;
> struct mlx4_icm_table qp_table;
> struct mlx4_icm_table auxc_table;
> struct mlx4_icm_table altc_table;
> diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c
> b/drivers/net/ethernet/mellanox/mlx4/qp.c
> index 427e7a31862c..4659ecec12c1 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/qp.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
> @@ -48,16 +48,13 @@
>
> void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
> {
> - struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
> struct mlx4_qp *qp;
>
> - spin_lock(&qp_table->lock);
> -
> + xa_lock(&dev->qp_table);
> qp = __mlx4_qp_lookup(dev, qpn);
> if (qp)
> refcount_inc(&qp->refcount);
> -
> - spin_unlock(&qp_table->lock);
> + xa_unlock(&dev->qp_table);
>
> if (!qp) {
> mlx4_dbg(dev, "Async event for none existent QP
> %08x\n", qpn);
> @@ -390,21 +387,11 @@ static void mlx4_qp_free_icm(struct mlx4_dev
> *dev, int qpn)
>
> struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
> {
> - struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
> - struct mlx4_qp *qp;
> -
> - spin_lock_irq(&qp_table->lock);
> -
> - qp = __mlx4_qp_lookup(dev, qpn);
> -
> - spin_unlock_irq(&qp_table->lock);
> - return qp;
> + return __mlx4_qp_lookup(dev, qpn);
> }
>
> int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
> {
> - struct mlx4_priv *priv = mlx4_priv(dev);
> - struct mlx4_qp_table *qp_table = &priv->qp_table;
> int err;
>
> if (!qpn)
> @@ -416,10 +403,9 @@ int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn,
> struct mlx4_qp *qp)
> if (err)
> return err;
>
> - spin_lock_irq(&qp_table->lock);
> - err = radix_tree_insert(&dev->qp_table_tree, qp->qpn &
> - (dev->caps.num_qps - 1), qp);
> - spin_unlock_irq(&qp_table->lock);
> + err = xa_err(xa_store_irq(&dev->qp_table,
> + qp->qpn & (dev->caps.num_qps - 1),
> + qp, GFP_ATOMIC));
mlx4_qp_alloc might sleep, so GFP_KERNEL here.
> if (err)
> goto err_icm;
>
> @@ -512,12 +498,11 @@ EXPORT_SYMBOL_GPL(mlx4_update_qp);
>
> void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp)
> {
> - struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
> unsigned long flags;
>
> - spin_lock_irqsave(&qp_table->lock, flags);
> - radix_tree_delete(&dev->qp_table_tree, qp->qpn & (dev-
> >caps.num_qps - 1));
> - spin_unlock_irqrestore(&qp_table->lock, flags);
> + xa_lock_irqsave(&dev->qp_table, flags);
> + __xa_erase(&dev->qp_table, qp->qpn & (dev->caps.num_qps - 1));
> + xa_unlock_irqrestore(&dev->qp_table, flags);
> }
> EXPORT_SYMBOL_GPL(mlx4_qp_remove);
>
> @@ -760,7 +745,6 @@ static void mlx4_cleanup_qp_zones(struct mlx4_dev
> *dev)
>
> int mlx4_init_qp_table(struct mlx4_dev *dev)
> {
> - struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
> int err;
> int reserved_from_top = 0;
> int reserved_from_bot;
> @@ -770,8 +754,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
> u32 max_table_offset = dev->caps.dmfs_high_rate_qpn_base +
> dev->caps.dmfs_high_rate_qpn_range;
>
> - spin_lock_init(&qp_table->lock);
> - INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC);
> + xa_init_flags(&dev->qp_table, XA_FLAGS_LOCK_IRQ);
> if (mlx4_is_slave(dev))
> return 0;
>
> diff --git a/include/linux/mlx4/device.h
> b/include/linux/mlx4/device.h
> index 36e412c3d657..acffca7d9f00 100644
> --- a/include/linux/mlx4/device.h
> +++ b/include/linux/mlx4/device.h
> @@ -36,7 +36,7 @@
> #include <linux/if_ether.h>
> #include <linux/pci.h>
> #include <linux/completion.h>
> -#include <linux/radix-tree.h>
> +#include <linux/xarray.h>
> #include <linux/cpu_rmap.h>
> #include <linux/crash_dump.h>
>
> @@ -889,7 +889,7 @@ struct mlx4_dev {
> struct mlx4_caps caps;
> struct mlx4_phys_caps phys_caps;
> struct mlx4_quotas quotas;
> - struct radix_tree_root qp_table_tree;
> + struct xarray qp_table;
> u8 rev_id;
> u8 port_random_macs;
> char board_id[MLX4_BOARD_ID_LEN];
> diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
> index 8e2828d48d7f..6c3ec3197a10 100644
> --- a/include/linux/mlx4/qp.h
> +++ b/include/linux/mlx4/qp.h
> @@ -488,7 +488,7 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct
> mlx4_mtt *mtt,
>
> static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev,
> u32 qpn)
> {
> - return radix_tree_lookup(&dev->qp_table_tree, qpn & (dev-
> >caps.num_qps - 1));
> + return xa_load(&dev->qp_table, qpn & (dev->caps.num_qps - 1));
> }
>
> void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp);
^ permalink raw reply
* [PATCH v1 2/5] mdev: Make mdev alias unique among all mdevs
From: Parav Pandit @ 2019-08-27 19:16 UTC (permalink / raw)
To: alex.williamson, jiri, kwankhede, cohuck, davem
Cc: kvm, linux-kernel, netdev, Parav Pandit
In-Reply-To: <20190827191654.41161-1-parav@mellanox.com>
Mdev alias should be unique among all the mdevs, so that when such alias
is used by the mdev users to derive other objects, there is no
collision in a given system.
Signed-off-by: Parav Pandit <parav@mellanox.com>
---
Changelog:
v0->v1:
- Fixed inclusiong of alias for NULL check
- Added ratelimited debug print for sha1 hash collision error
---
drivers/vfio/mdev/mdev_core.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
index 62d29f57fe0c..4b9899e40665 100644
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -375,6 +375,13 @@ int mdev_device_create(struct kobject *kobj, struct device *dev,
ret = -EEXIST;
goto mdev_fail;
}
+ if (tmp->alias && alias && strcmp(tmp->alias, alias) == 0) {
+ mutex_unlock(&mdev_list_lock);
+ ret = -EEXIST;
+ dev_dbg_ratelimited(dev, "Hash collision in alias creation for UUID %pUl\n",
+ uuid);
+ goto mdev_fail;
+ }
}
mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
--
2.19.2
^ permalink raw reply related
* [PATCH v1 1/5] mdev: Introduce sha1 based mdev alias
From: Parav Pandit @ 2019-08-27 19:16 UTC (permalink / raw)
To: alex.williamson, jiri, kwankhede, cohuck, davem
Cc: kvm, linux-kernel, netdev, Parav Pandit
In-Reply-To: <20190827191654.41161-1-parav@mellanox.com>
Some vendor drivers want an identifier for an mdev device that is
shorter than the UUID, due to length restrictions in the consumers of
that identifier.
Add a callback that allows a vendor driver to request an alias of a
specified length to be generated for an mdev device. If generated,
that alias is checked for collisions.
It is an optional attribute.
mdev alias is generated using sha1 from the mdev name.
Signed-off-by: Parav Pandit <parav@mellanox.com>
---
Changelog:
v0->v1:
- Moved alias length check outside of the parent lock
- Moved alias and digest allocation from kvzalloc to kzalloc
- &alias[0] changed to alias
- alias_length check is nested under get_alias_length callback check
- Changed comments to start with an empty line
- Fixed cleaunup of hash if mdev_bus_register() fails
- Added comment where alias memory ownership is handed over to mdev device
- Updated commit log to indicate motivation for this feature
---
drivers/vfio/mdev/mdev_core.c | 110 ++++++++++++++++++++++++++++++-
drivers/vfio/mdev/mdev_private.h | 5 +-
drivers/vfio/mdev/mdev_sysfs.c | 13 ++--
include/linux/mdev.h | 4 ++
4 files changed, 122 insertions(+), 10 deletions(-)
diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
index b558d4cfd082..62d29f57fe0c 100644
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -10,9 +10,11 @@
#include <linux/module.h>
#include <linux/device.h>
#include <linux/slab.h>
+#include <linux/mm.h>
#include <linux/uuid.h>
#include <linux/sysfs.h>
#include <linux/mdev.h>
+#include <crypto/hash.h>
#include "mdev_private.h"
@@ -27,6 +29,8 @@ static struct class_compat *mdev_bus_compat_class;
static LIST_HEAD(mdev_list);
static DEFINE_MUTEX(mdev_list_lock);
+static struct crypto_shash *alias_hash;
+
struct device *mdev_parent_dev(struct mdev_device *mdev)
{
return mdev->parent->dev;
@@ -150,6 +154,16 @@ int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops)
if (!ops || !ops->create || !ops->remove || !ops->supported_type_groups)
return -EINVAL;
+ if (ops->get_alias_length) {
+ unsigned int digest_size;
+ unsigned int aligned_len;
+
+ aligned_len = roundup(ops->get_alias_length(), 2);
+ digest_size = crypto_shash_digestsize(alias_hash);
+ if (aligned_len / 2 > digest_size)
+ return -EINVAL;
+ }
+
dev = get_device(dev);
if (!dev)
return -EINVAL;
@@ -259,6 +273,7 @@ static void mdev_device_free(struct mdev_device *mdev)
mutex_unlock(&mdev_list_lock);
dev_dbg(&mdev->dev, "MDEV: destroying\n");
+ kfree(mdev->alias);
kfree(mdev);
}
@@ -269,18 +284,88 @@ static void mdev_device_release(struct device *dev)
mdev_device_free(mdev);
}
-int mdev_device_create(struct kobject *kobj,
- struct device *dev, const guid_t *uuid)
+static const char *
+generate_alias(const char *uuid, unsigned int max_alias_len)
+{
+ struct shash_desc *hash_desc;
+ unsigned int digest_size;
+ unsigned char *digest;
+ unsigned int alias_len;
+ char *alias;
+ int ret = 0;
+
+ /*
+ * Align to multiple of 2 as bin2hex will generate
+ * even number of bytes.
+ */
+ alias_len = roundup(max_alias_len, 2);
+ alias = kzalloc(alias_len + 1, GFP_KERNEL);
+ if (!alias)
+ return NULL;
+
+ /* Allocate and init descriptor */
+ hash_desc = kvzalloc(sizeof(*hash_desc) +
+ crypto_shash_descsize(alias_hash),
+ GFP_KERNEL);
+ if (!hash_desc)
+ goto desc_err;
+
+ hash_desc->tfm = alias_hash;
+
+ digest_size = crypto_shash_digestsize(alias_hash);
+
+ digest = kzalloc(digest_size, GFP_KERNEL);
+ if (!digest) {
+ ret = -ENOMEM;
+ goto digest_err;
+ }
+ crypto_shash_init(hash_desc);
+ crypto_shash_update(hash_desc, uuid, UUID_STRING_LEN);
+ crypto_shash_final(hash_desc, digest);
+ bin2hex(alias, digest, min_t(unsigned int, digest_size, alias_len / 2));
+ /*
+ * When alias length is odd, zero out and additional last byte
+ * that bin2hex has copied.
+ */
+ if (max_alias_len % 2)
+ alias[max_alias_len] = 0;
+
+ kfree(digest);
+ kvfree(hash_desc);
+ return alias;
+
+digest_err:
+ kvfree(hash_desc);
+desc_err:
+ kfree(alias);
+ return NULL;
+}
+
+int mdev_device_create(struct kobject *kobj, struct device *dev,
+ const char *uuid_str, const guid_t *uuid)
{
int ret;
struct mdev_device *mdev, *tmp;
struct mdev_parent *parent;
struct mdev_type *type = to_mdev_type(kobj);
+ const char *alias = NULL;
parent = mdev_get_parent(type->parent);
if (!parent)
return -EINVAL;
+ if (parent->ops->get_alias_length) {
+ unsigned int alias_len;
+
+ alias_len = parent->ops->get_alias_length();
+ if (alias_len) {
+ alias = generate_alias(uuid_str, alias_len);
+ if (!alias) {
+ ret = -ENOMEM;
+ goto alias_fail;
+ }
+ }
+ }
mutex_lock(&mdev_list_lock);
/* Check for duplicate */
@@ -300,6 +385,12 @@ int mdev_device_create(struct kobject *kobj,
}
guid_copy(&mdev->uuid, uuid);
+ mdev->alias = alias;
+ /*
+ * At this point alias memory is owned by the mdev.
+ * Mark it NULL, so that only mdev can free it.
+ */
+ alias = NULL;
list_add(&mdev->next, &mdev_list);
mutex_unlock(&mdev_list_lock);
@@ -346,6 +437,8 @@ int mdev_device_create(struct kobject *kobj,
up_read(&parent->unreg_sem);
put_device(&mdev->dev);
mdev_fail:
+ kfree(alias);
+alias_fail:
mdev_put_parent(parent);
return ret;
}
@@ -406,7 +499,17 @@ EXPORT_SYMBOL(mdev_get_iommu_device);
static int __init mdev_init(void)
{
- return mdev_bus_register();
+ int ret;
+
+ alias_hash = crypto_alloc_shash("sha1", 0, 0);
+ if (!alias_hash)
+ return -ENOMEM;
+
+ ret = mdev_bus_register();
+ if (ret)
+ crypto_free_shash(alias_hash);
+
+ return ret;
}
static void __exit mdev_exit(void)
@@ -415,6 +518,7 @@ static void __exit mdev_exit(void)
class_compat_unregister(mdev_bus_compat_class);
mdev_bus_unregister();
+ crypto_free_shash(alias_hash);
}
module_init(mdev_init)
diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h
index 7d922950caaf..cf1c0d9842c6 100644
--- a/drivers/vfio/mdev/mdev_private.h
+++ b/drivers/vfio/mdev/mdev_private.h
@@ -33,6 +33,7 @@ struct mdev_device {
struct kobject *type_kobj;
struct device *iommu_device;
bool active;
+ const char *alias;
};
#define to_mdev_device(dev) container_of(dev, struct mdev_device, dev)
@@ -57,8 +58,8 @@ void parent_remove_sysfs_files(struct mdev_parent *parent);
int mdev_create_sysfs_files(struct device *dev, struct mdev_type *type);
void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type);
-int mdev_device_create(struct kobject *kobj,
- struct device *dev, const guid_t *uuid);
+int mdev_device_create(struct kobject *kobj, struct device *dev,
+ const char *uuid_str, const guid_t *uuid);
int mdev_device_remove(struct device *dev);
#endif /* MDEV_PRIVATE_H */
diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c
index 7570c7602ab4..43afe0e80b76 100644
--- a/drivers/vfio/mdev/mdev_sysfs.c
+++ b/drivers/vfio/mdev/mdev_sysfs.c
@@ -63,15 +63,18 @@ static ssize_t create_store(struct kobject *kobj, struct device *dev,
return -ENOMEM;
ret = guid_parse(str, &uuid);
- kfree(str);
if (ret)
- return ret;
+ goto err;
- ret = mdev_device_create(kobj, dev, &uuid);
+ ret = mdev_device_create(kobj, dev, str, &uuid);
if (ret)
- return ret;
+ goto err;
- return count;
+ ret = count;
+
+err:
+ kfree(str);
+ return ret;
}
MDEV_TYPE_ATTR_WO(create);
diff --git a/include/linux/mdev.h b/include/linux/mdev.h
index 0ce30ca78db0..f036fe9854ee 100644
--- a/include/linux/mdev.h
+++ b/include/linux/mdev.h
@@ -72,6 +72,9 @@ struct device *mdev_get_iommu_device(struct device *dev);
* @mmap: mmap callback
* @mdev: mediated device structure
* @vma: vma structure
+ * @get_alias_length: Generate alias for the mdevs of this parent based on the
+ * mdev device name when it returns non zero alias length.
+ * It is optional.
* Parent device that support mediated device should be registered with mdev
* module with mdev_parent_ops structure.
**/
@@ -92,6 +95,7 @@ struct mdev_parent_ops {
long (*ioctl)(struct mdev_device *mdev, unsigned int cmd,
unsigned long arg);
int (*mmap)(struct mdev_device *mdev, struct vm_area_struct *vma);
+ unsigned int (*get_alias_length)(void);
};
/* interface for exporting mdev supported type attributes */
--
2.19.2
^ permalink raw reply related
* [PATCH v1 5/5] mtty: Optionally support mtty alias
From: Parav Pandit @ 2019-08-27 19:16 UTC (permalink / raw)
To: alex.williamson, jiri, kwankhede, cohuck, davem
Cc: kvm, linux-kernel, netdev, Parav Pandit
In-Reply-To: <20190827191654.41161-1-parav@mellanox.com>
Provide a module parameter to set alias length to optionally generate
mdev alias.
Example to request mdev alias.
$ modprobe mtty alias_length=12
Signed-off-by: Parav Pandit <parav@mellanox.com>
---
samples/vfio-mdev/mtty.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
index 92e770a06ea2..92208245b057 100644
--- a/samples/vfio-mdev/mtty.c
+++ b/samples/vfio-mdev/mtty.c
@@ -1410,6 +1410,15 @@ static struct attribute_group *mdev_type_groups[] = {
NULL,
};
+static unsigned int mtty_alias_length;
+module_param_named(alias_length, mtty_alias_length, uint, 0444);
+MODULE_PARM_DESC(alias_length, "mdev alias length; default=0");
+
+static unsigned int mtty_get_alias_length(void)
+{
+ return mtty_alias_length;
+}
+
static const struct mdev_parent_ops mdev_fops = {
.owner = THIS_MODULE,
.dev_attr_groups = mtty_dev_groups,
@@ -1422,6 +1431,7 @@ static const struct mdev_parent_ops mdev_fops = {
.read = mtty_read,
.write = mtty_write,
.ioctl = mtty_ioctl,
+ .get_alias_length = mtty_get_alias_length
};
static void mtty_device_release(struct device *dev)
--
2.19.2
^ permalink raw reply related
* [PATCH v1 4/5] mdev: Update sysfs documentation
From: Parav Pandit @ 2019-08-27 19:16 UTC (permalink / raw)
To: alex.williamson, jiri, kwankhede, cohuck, davem
Cc: kvm, linux-kernel, netdev, Parav Pandit
In-Reply-To: <20190827191654.41161-1-parav@mellanox.com>
Updated documentation for optional read only sysfs attribute.
Signed-off-by: Parav Pandit <parav@mellanox.com>
---
Documentation/driver-api/vfio-mediated-device.rst | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/Documentation/driver-api/vfio-mediated-device.rst b/Documentation/driver-api/vfio-mediated-device.rst
index 25eb7d5b834b..0ab03d3f5629 100644
--- a/Documentation/driver-api/vfio-mediated-device.rst
+++ b/Documentation/driver-api/vfio-mediated-device.rst
@@ -270,6 +270,7 @@ Directories and Files Under the sysfs for Each mdev Device
|--- remove
|--- mdev_type {link to its type}
|--- vendor-specific-attributes [optional]
+ |--- alias [optional]
* remove (write only)
@@ -281,6 +282,10 @@ Example::
# echo 1 > /sys/bus/mdev/devices/$mdev_UUID/remove
+* alias (read only)
+Whenever a parent requested to generate an alias, each mdev is assigned a unique
+alias by the mdev core. This file shows the alias of the mdev device.
+
Mediated device Hot plug
------------------------
--
2.19.2
^ permalink raw reply related
* [PATCH v1 3/5] mdev: Expose mdev alias in sysfs tree
From: Parav Pandit @ 2019-08-27 19:16 UTC (permalink / raw)
To: alex.williamson, jiri, kwankhede, cohuck, davem
Cc: kvm, linux-kernel, netdev, Parav Pandit
In-Reply-To: <20190827191654.41161-1-parav@mellanox.com>
Expose the optional alias for an mdev device as a sysfs attribute.
This way, userspace tools such as udev may make use of the alias, for
example to create a netdevice name for the mdev.
Signed-off-by: Parav Pandit <parav@mellanox.com>
---
Changelog:
v0->v1:
- Addressed comments from Cornelia Huck
- Updated commit description
---
drivers/vfio/mdev/mdev_sysfs.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c
index 43afe0e80b76..59f4e3cc5233 100644
--- a/drivers/vfio/mdev/mdev_sysfs.c
+++ b/drivers/vfio/mdev/mdev_sysfs.c
@@ -246,7 +246,20 @@ static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
static DEVICE_ATTR_WO(remove);
+static ssize_t alias_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct mdev_device *dev = mdev_from_dev(device);
+
+ if (!dev->alias)
+ return -EOPNOTSUPP;
+
+ return sprintf(buf, "%s\n", dev->alias);
+}
+static DEVICE_ATTR_RO(alias);
+
static const struct attribute *mdev_device_attrs[] = {
+ &dev_attr_alias.attr,
&dev_attr_remove.attr,
NULL,
};
--
2.19.2
^ permalink raw reply related
* [PATCH v1 0/5] Introduce variable length mdev alias
From: Parav Pandit @ 2019-08-27 19:16 UTC (permalink / raw)
To: alex.williamson, jiri, kwankhede, cohuck, davem
Cc: kvm, linux-kernel, netdev, Parav Pandit
In-Reply-To: <20190826204119.54386-1-parav@mellanox.com>
To have consistent naming for the netdevice of a mdev and to have
consistent naming of the devlink port [1] of a mdev, which is formed using
phys_port_name of the devlink port, current UUID is not usable because
UUID is too long.
UUID in string format is 36-characters long and in binary 128-bit.
Both formats are not able to fit within 15 characters limit of netdev
name.
It is desired to have mdev device naming consistent using UUID.
So that widely used user space framework such as ovs [2] can make use
of mdev representor in similar way as PCIe SR-IOV VF and PF representors.
Hence,
(a) mdev alias is created which is derived using sha1 from the mdev name.
(b) Vendor driver describes how long an alias should be for the child mdev
created for a given parent.
(c) Mdev aliases are unique at system level.
(d) alias is created optionally whenever parent requested.
This ensures that non networking mdev parents can function without alias
creation overhead.
This design is discussed at [3].
An example systemd/udev extension will have,
1. netdev name created using mdev alias available in sysfs.
mdev UUID=83b8f4f2-509f-382f-3c1e-e6bfe0fa1001
mdev 12 character alias=cd5b146a80a5
netdev name of this mdev = enmcd5b146a80a5
Here en = Ethernet link
m = mediated device
2. devlink port phys_port_name created using mdev alias.
devlink phys_port_name=pcd5b146a80a5
This patchset enables mdev core to maintain unique alias for a mdev.
Patch-1 Introduces mdev alias using sha1.
Patch-2 Ensures that mdev alias is unique in a system.
Patch-3 Exposes mdev alias in a sysfs hirerchy.
Patch-4 Extends mtty driver to optionally provide alias generation.
This also enables to test UUID based sha1 collision and trigger
error handling for duplicate sha1 results.
In future when networking driver wants to use mdev alias, mdev_alias()
API will be added to derive devlink port name.
[1] http://man7.org/linux/man-pages/man8/devlink-port.8.html
[2] https://docs.openstack.org/os-vif/latest/user/plugins/ovs.html
[3] https://patchwork.kernel.org/cover/11084231/
---
Changelog:
v0->v1:
- Addressed comments from Alex Williamson, Cornelia Hunk and Mark Bloch
- Moved alias length check outside of the parent lock
- Moved alias and digest allocation from kvzalloc to kzalloc
- &alias[0] changed to alias
- alias_length check is nested under get_alias_length callback check
- Changed comments to start with an empty line
- Added comment where alias memory ownership is handed over to mdev device
- Fixed cleaunup of hash if mdev_bus_register() fails
- Updated documentation for new sysfs alias file
- Improved commit logs to make description more clear
- Fixed inclusiong of alias for NULL check
- Added ratelimited debug print for sha1 hash collision error
Parav Pandit (5):
mdev: Introduce sha1 based mdev alias
mdev: Make mdev alias unique among all mdevs
mdev: Expose mdev alias in sysfs tree
mdev: Update sysfs documentation
mtty: Optionally support mtty alias
.../driver-api/vfio-mediated-device.rst | 5 +
drivers/vfio/mdev/mdev_core.c | 117 +++++++++++++++++-
drivers/vfio/mdev/mdev_private.h | 5 +-
drivers/vfio/mdev/mdev_sysfs.c | 26 +++-
include/linux/mdev.h | 4 +
samples/vfio-mdev/mtty.c | 10 ++
6 files changed, 157 insertions(+), 10 deletions(-)
--
2.19.2
^ permalink raw reply
* [PATCH net] tcp: inherit timestamp on mtu probe
From: Willem de Bruijn @ 2019-08-27 19:09 UTC (permalink / raw)
To: netdev; +Cc: davem, edumazet, jakub.kicinski, Willem de Bruijn
From: Willem de Bruijn <willemb@google.com>
TCP associates tx timestamp requests with a byte in the bytestream.
If merging skbs in tcp_mtu_probe, migrate the tstamp request.
Similar to MSG_EOR, do not allow moving a timestamp from any segment
in the probe but the last. This to avoid merging multiple timestamps.
Tested with the packetdrill script at
https://github.com/wdebruij/packetdrill/commits/mtu_probe-1
Link: http://patchwork.ozlabs.org/patch/1143278/#2232897
Fixes: 4ed2d765dfac ("net-timestamp: TCP timestamping")
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
net/ipv4/tcp_output.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5c46bc4c7e8d..42abc9bd687a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2053,7 +2053,7 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
if (len <= skb->len)
break;
- if (unlikely(TCP_SKB_CB(skb)->eor))
+ if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb))
return false;
len -= skb->len;
@@ -2170,6 +2170,7 @@ static int tcp_mtu_probe(struct sock *sk)
* we need to propagate it to the new skb.
*/
TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor;
+ tcp_skb_collapse_tstamp(nskb, skb);
tcp_unlink_write_queue(skb, sk);
sk_wmem_free_skb(sk, skb);
} else {
--
2.23.0.187.g17f5b7556c-goog
^ permalink raw reply related
* Re: [PATCH] net: intel: Cleanup e1000 - add space between }}
From: Joe Perches @ 2019-08-27 19:07 UTC (permalink / raw)
To: jeffrey.t.kirsher, Forrest Fleming, Andrew Morton
Cc: David S. Miller, intel-wired-lan, netdev, linux-kernel
In-Reply-To: <c40b4043424055fc4dae97771bb46c8ab15c6230.camel@intel.com>
On Tue, 2019-08-27 at 12:02 -0700, Jeff Kirsher wrote:
> On Mon, 2019-08-26 at 20:41 -0700, Joe Perches wrote:
> > On Mon, 2019-08-26 at 01:03 -0700, Jeff Kirsher wrote:
> > > On Fri, 2019-08-23 at 19:14 +0000, Forrest Fleming wrote:
> > > > suggested by checkpatch
> > > >
> > > > Signed-off-by: Forrest Fleming <ffleming@gmail.com>
> > > > ---
> > > > .../net/ethernet/intel/e1000/e1000_param.c | 28 +++++++++--
> > > > --------
> > > > 1 file changed, 14 insertions(+), 14 deletions(-)
> > >
> > > While I do not see an issue with this change, I wonder how
> > > important it is
> > > to make such a change. Especially since most of the hardware
> > > supported by
> > > this driver is not available for testing. In addition, this is one
> > > suggested change by checkpatch.pl that I personally do not agree
> > > with.
> >
> > I think checkpatch should allow consecutive }}.
>
> Agreed, have you already submitted a formal patch Joe with the
> suggested change below?
No.
> If so, I will ACK it.
Of course you can add an Acked-by:
> > Maybe:
> > ---
> > diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
> > index 287fe73688f0..ac5e0f06e1af 100755
> > --- a/scripts/checkpatch.pl
> > +++ b/scripts/checkpatch.pl
> > @@ -4687,7 +4687,7 @@ sub process {
> >
> > # closing brace should have a space following it when it has
> > anything
> > # on the line
> > - if ($line =~ /}(?!(?:,|;|\)))\S/) {
> > + if ($line =~ /}(?!(?:,|;|\)|\}))\S/) {
> > if (ERROR("SPACING",
> > "space required after that close
> > brace '}'\n" . $herecurr) &&
> > $fix) {
> >
> >
^ permalink raw reply
* [PATCH v3 1/1] netfilter: nf_tables: fib: Drop IPV6 packets if IPv6 is disabled on boot
From: Leonardo Bras @ 2019-08-27 18:57 UTC (permalink / raw)
To: netfilter-devel, coreteam, netdev, linux-kernel
Cc: Leonardo Bras, Pablo Neira Ayuso, Jozsef Kadlecsik,
Florian Westphal, David S. Miller, Alexey Kuznetsov,
Hideaki YOSHIFUJI
If IPv6 is disabled on boot (ipv6.disable=1), but nft_fib_inet ends up
dealing with a IPv6 packet, it causes a kernel panic in
fib6_node_lookup_1(), crashing in bad_page_fault.
The panic is caused by trying to deference a very low address (0x38
in ppc64le), due to ipv6.fib6_main_tbl = NULL.
BUG: Kernel NULL pointer dereference at 0x00000038
Fix this behavior by dropping IPv6 packets if !ipv6_mod_enabled().
Signed-off-by: Leonardo Bras <leonardo@linux.ibm.com>
---
Changes from v2:
- Replace veredict.code from NF_DROP to NFT_BREAK
- Updated commit message (s/package/packet)
Changes from v1:
- Move drop logic from nft_fib_inet_eval() to nft_fib6_eval{,_type}
so it can affect other usages of these functions.
net/ipv6/netfilter/nft_fib_ipv6.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 7ece86afd079..8496e43b73bd 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -125,6 +125,11 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
u32 *dest = ®s->data[priv->dreg];
struct ipv6hdr *iph, _iph;
+ if (!ipv6_mod_enabled()) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+
iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
if (!iph) {
regs->verdict.code = NFT_BREAK;
@@ -150,6 +155,11 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
struct rt6_info *rt;
int lookup_flags;
+ if (!ipv6_mod_enabled()) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+
if (priv->flags & NFTA_FIB_F_IIF)
oif = nft_in(pkt);
else if (priv->flags & NFTA_FIB_F_OIF)
--
2.20.1
^ permalink raw reply related
* Re: [PATCH] net: intel: Cleanup e1000 - add space between }}
From: Jeff Kirsher @ 2019-08-27 19:02 UTC (permalink / raw)
To: Joe Perches, Forrest Fleming, Andrew Morton
Cc: David S. Miller, intel-wired-lan, netdev, linux-kernel
In-Reply-To: <877726fc009ee5ffde50e589d332db90c9695f06.camel@perches.com>
[-- Attachment #1: Type: text/plain, Size: 1477 bytes --]
On Mon, 2019-08-26 at 20:41 -0700, Joe Perches wrote:
> On Mon, 2019-08-26 at 01:03 -0700, Jeff Kirsher wrote:
> > On Fri, 2019-08-23 at 19:14 +0000, Forrest Fleming wrote:
> > > suggested by checkpatch
> > >
> > > Signed-off-by: Forrest Fleming <ffleming@gmail.com>
> > > ---
> > > .../net/ethernet/intel/e1000/e1000_param.c | 28 +++++++++--
> > > --------
> > > 1 file changed, 14 insertions(+), 14 deletions(-)
> >
> > While I do not see an issue with this change, I wonder how
> > important it is
> > to make such a change. Especially since most of the hardware
> > supported by
> > this driver is not available for testing. In addition, this is one
> > suggested change by checkpatch.pl that I personally do not agree
> > with.
>
> I think checkpatch should allow consecutive }}.
Agreed, have you already submitted a formal patch Joe with the
suggested change below? If so, I will ACK it.
>
> Maybe:
> ---
> diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
> index 287fe73688f0..ac5e0f06e1af 100755
> --- a/scripts/checkpatch.pl
> +++ b/scripts/checkpatch.pl
> @@ -4687,7 +4687,7 @@ sub process {
>
> # closing brace should have a space following it when it has
> anything
> # on the line
> - if ($line =~ /}(?!(?:,|;|\)))\S/) {
> + if ($line =~ /}(?!(?:,|;|\)|\}))\S/) {
> if (ERROR("SPACING",
> "space required after that close
> brace '}'\n" . $herecurr) &&
> $fix) {
>
>
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply
* Re: [PATCH] net: Adding parameter detection in __ethtool_get_link_ksettings.
From: Saeed Mahameed @ 2019-08-27 19:01 UTC (permalink / raw)
To: liudongxu3@huawei.com, eric.dumazet@gmail.com
Cc: davem@davemloft.net, linux-kernel@vger.kernel.org,
netdev@vger.kernel.org
In-Reply-To: <20190826094705.10544-1-liudongxu3@huawei.com>
On Mon, 2019-08-26 at 17:47 +0800, Dongxu Liu wrote:
> > On 8/26/19 9:23 AM, Dongxu Liu wrote:
> > The __ethtool_get_link_ksettings symbol will be exported,
> > and external users may use an illegal address.
> > We should check the parameters before using them,
> > otherwise the system will crash.
> >
> > [ 8980.991134] BUG: unable to handle kernel NULL pointer
> > dereference at (null)
> > [ 8980.993049] IP: [<ffffffff8155aca7>]
> > __ethtool_get_link_ksettings+0x27/0x140
> > [ 8980.994285] PGD 0
> > [ 8980.995013] Oops: 0000 [#1] SMP
> > [ 8980.995896] Modules linked in: sch_ingress ...
> > [ 8981.013220] CPU: 3 PID: 25174 Comm: kworker/3:3 Tainted:
> > G O ----V------- 3.10.0-327.36.58.4.x86_64 #1
> > [ 8981.017667] Workqueue: events linkwatch_event
> > [ 8981.018652] task: ffff8800a8348000 ti: ffff8800b045c000 task.ti:
> > ffff8800b045c000
> > [ 8981.020418] RIP: 0010:[<ffffffff8155aca7>] [<ffffffff8155aca7>]
> > __ethtool_get_link_ksettings+0x27/0x140
> > [ 8981.022383] RSP: 0018:ffff8800b045fc88 EFLAGS: 00010202
> > [ 8981.023453] RAX: 0000000000000000 RBX: ffff8800b045fcac RCX:
> > 0000000000000000
> > [ 8981.024726] RDX: ffff8800b658f600 RSI: ffff8800b045fcac RDI:
> > ffff8802296e0000
> > [ 8981.026000] RBP: ffff8800b045fc98 R08: 0000000000000000 R09:
> > 0000000000000001
> > [ 8981.027273] R10: 00000000000073e0 R11: 0000082b0cc8adea R12:
> > ffff8802296e0000
> > [ 8981.028561] R13: ffff8800b566e8c0 R14: ffff8800b658f600 R15:
> > ffff8800b566e000
> > [ 8981.029841] FS: 0000000000000000(0000)
> > GS:ffff88023ed80000(0000) knlGS:0000000000000000
> > [ 8981.031715] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [ 8981.032845] CR2: 0000000000000000 CR3: 00000000b39a9000 CR4:
> > 00000000003407e0
> > [ 8981.034137] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
> > 0000000000000000
> > [ 8981.035427] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
> > 0000000000000400
> > [ 8981.036702] Stack:
> > [ 8981.037406] ffff8800b658f600 0000000000009c40 ffff8800b045fce8
> > ffffffffa047a71d
> > [ 8981.039238] 000000000000004d ffff8800b045fcc8 ffff8800b045fd28
> > ffffffff815cb198
> > [ 8981.041070] ffff8800b045fcd8 ffffffff810807e6 00000000e8212951
> > 0000000000000001
> > [ 8981.042910] Call Trace:
> > [ 8981.043660] [<ffffffffa047a71d>]
> > bond_update_speed_duplex+0x3d/0x90 [bonding]
> > [ 8981.045424] [<ffffffff815cb198>] ? inetdev_event+0x38/0x530
> > [ 8981.046554] [<ffffffff810807e6>] ? put_online_cpus+0x56/0x80
> > [ 8981.047688] [<ffffffffa0480d67>] bond_netdev_event+0x137/0x360
> > [bonding]
> > ...
> >
> > Signed-off-by: Dongxu Liu <liudongxu3@huawei.com>
> > ---
> > net/core/ethtool.c | 2 ++
> > 1 file changed, 2 insertions(+)
> >
> > diff --git a/net/core/ethtool.c b/net/core/ethtool.c index
> > 6288e69..9a50b64 100644
> > --- a/net/core/ethtool.c
> > +++ b/net/core/ethtool.c
> > @@ -545,6 +545,8 @@ int __ethtool_get_link_ksettings(struct
> > net_device
> > *dev, {
> > ASSERT_RTNL();
> >
> > + if (!dev || !dev->ethtool_ops)
> > + return -EOPNOTSUPP;
> > I do not believe dev can possibly be NULL at this point.
> > if (!dev->ethtool_ops->get_link_ksettings)
> > return -EOPNOTSUPP;
> >
> >
> > I tried to find an appropriate Fixes: tag.
> > It seems this particular bug was added either by
> > Fixes: 9856909c2abb ("net: bonding: use __ethtool_get_ksettings")
> > or generically in :
> > Fixes: 3f1ac7a700d0 ("net: ethtool: add new ETHTOOL_xLINKSETTINGS
> > API")
>
> In fact, "dev->ethtool_ops" is a null pointer in my environment.
> I didn't get the case where "dev" is a null pointer.
dev can't be a null pointer since bond driver guarantees that
and there is a check for the case where it could be null in
bond_slave_netdev_event.
You can drop the "!dev" check, since also it should be the caller
responsibility and we should avoid cluttering the net core code with
such redundant checks.
> Maybe "if (!dev->ethtool_ops)" is more accurate for this bug.
>
Also i am not sure about this, could be a bug in the device driver your
enslaving.
alloc_netdev_mqs will assign &default_ethtool_ops to dev->ethtool_ops ,
if user provided setup callback didn't assign the driver specific
ethtool_ops.
so the device driver must be doing something wrong, overwriting defult
ethtool_ops with a NULL pointer maybe ? and why ?
> I found this bug in version 3.10, the function name was
> __ethtool_get_settings.
> After 3f1ac7a700d0 ("net: ethtool: add new ETHTOOL_xLINKSETTINGS
> API"),
> This function evolved into __ethtool_get_link_ksettings.
>
^ permalink raw reply
* Re: [PATCH spi for-5.4 2/5] spi: Add a PTP system timestamp to the transfer structure
From: Mark Brown @ 2019-08-27 19:01 UTC (permalink / raw)
To: Vladimir Oltean
Cc: Hubert Feurstein, Miroslav Lichvar, Richard Cochran, Andrew Lunn,
Florian Fainelli, linux-spi, netdev
In-Reply-To: <CA+h21hrwJi1ftJn56RrfobdkcCpsKZGy1VV1+ANWpxoKxwRmwA@mail.gmail.com>
[-- Attachment #1: Type: text/plain, Size: 3542 bytes --]
On Sat, Aug 24, 2019 at 03:38:16PM +0300, Vladimir Oltean wrote:
> On Thu, 22 Aug 2019 at 21:19, Mark Brown <broonie@kernel.org> wrote:
> > On Sun, Aug 18, 2019 at 09:25:57PM +0300, Vladimir Oltean wrote:
> > > + if (!ctlr->ptp_sts_supported) {
> > > + list_for_each_entry(xfer, &mesg->transfers, transfer_list) {
> > > + xfer->ptp_sts_word_pre = 0;
> > > + ptp_read_system_prets(xfer->ptp_sts);
> > > + }
> > > + }
> > We can do better than this for controllers which use transfer_one().
> You mean I should guard this "if", and the one below, with "&&
> !ctlr->transfer_one"?
Yes, that'd make it a bit more obvious that the better handling
is there.
> > > + * @ptp_sts_supported: If the driver sets this to true, it must provide a
> > > + * time snapshot in @spi_transfer->ptp_sts as close as possible to the
> > > + * moment in time when @spi_transfer->ptp_sts_word_pre and
> > > + * @spi_transfer->ptp_sts_word_post were transmitted.
> > > + * If the driver does not set this, the SPI core takes the snapshot as
> > > + * close to the driver hand-over as possible.
> > A couple of issues here. The big one is that for PIO transfers
> > this is going to either complicate the code or introduce overhead
> > in individual drivers for an extremely niche use case. I guess
> > most drivers won't implement it which makes this a bit moot but
> > then this is a concern that pushes back against the idea of
> > implementing the feature.
> The concern is the overhead in terms of code, or runtime performance?
Both, yes.
> Arguably the applications that require deterministic latency are
> actually going to push for overall less overhead at runtime, even if
> that comes at a cost in terms of code size. The spi-fsl-dspi driver
> does not perform worse by any metric after this rework.
Determinalistic and fast are often note the same thing here,
sometimes it's better not to optimize if the optimization only
works some of the time for example.
> > The other is that it's not 100% clear what you're looking to
> > timestamp here - is it when the data goes on the wire, is it when
> > the data goes on the FIFO (which could be relatively large)? I'm
> > guessing you're looking for the physical transfer here, if that's
> > the case should there be some effort to compensate for the delays
> > in the controller?
> The goal is to timestamp the moment when the SPI slave sees word N of
> the data. Luckily the DSPI driver raises the TCF (Transfer Complete
> Flag) once that word has been transmitted, which I used to my
> advantage. The EOQ mode behaves similarly, but has a granularity of 4
> words. The controller delays are hence implicitly included in the
> software timestamp.
The documentation should be clear on that, it'd be very natural
for someone to timestamp on entry to the FIFO.
> But the question is valid and I expect that such compensation might be
> needed for some hardware, provided that it can be measured and
> guaranteed. In fact Hubert did add such logic to the v3 of his MDIO
> patch: https://lkml.org/lkml/2019/8/20/195 There were some objections
> mainly related to the certainty of those offset corrections. I don't
> want to "future-proof" the API now with features I have no use of, but
> such compensation logic might come in the future.
I think it's mainly important that people know what the
expectations are so different drivers are consistent in how they
work, as you say the API can always be extended later.
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply
* Re: [PATCH v2 1/1] netfilter: nf_tables: fib: Drop IPV6 packages if IPv6 is disabled on boot
From: Leonardo Bras @ 2019-08-27 18:55 UTC (permalink / raw)
To: Pablo Neira Ayuso
Cc: netfilter-devel, coreteam, netdev, linux-kernel, Jozsef Kadlecsik,
Florian Westphal, David S. Miller, Alexey Kuznetsov,
Hideaki YOSHIFUJI
In-Reply-To: <20190827185111.cgutfqkqwsufe2nl@salvia>
[-- Attachment #1: Type: text/plain, Size: 384 bytes --]
On Tue, 2019-08-27 at 20:51 +0200, Pablo Neira Ayuso wrote:
> > > The drop case at the bottom of the fib eval function never actually
> > > never happens.
> >
> > Which one do you mean?
>
> Line 31 of net/netfilter/nft_fib_inet.c.
Oh, yeah, I was thinking about that when I wrote the patch.
Thanks for explaining :)
I will send the v3 in a few minutes.
Best regards,
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply
* Re: [PATCH v5 net-next 01/18] ionic: Add basic framework for IONIC Network device driver
From: Shannon Nelson @ 2019-08-27 18:55 UTC (permalink / raw)
To: Jakub Kicinski; +Cc: netdev, davem
In-Reply-To: <20190826210615.6ce3631e@cakuba.netronome.com>
On 8/26/19 9:06 PM, Jakub Kicinski wrote:
> On Mon, 26 Aug 2019 14:33:22 -0700, Shannon Nelson wrote:
>> +struct ionic {
>> + struct pci_dev *pdev;
>> + struct device *dev;
>> + struct devlink *dl;
> No need for the dl pointer here. priv_to_devlink can be used to obtain
> the devlink pointer based on priv structure address.
>
>> +};
Sure
Thanks,
sln
^ permalink raw reply
* RE: [PATCH 2/4] mdev: Make mdev alias unique among all mdevs
From: Parav Pandit @ 2019-08-27 18:54 UTC (permalink / raw)
To: Alex Williamson
Cc: Cornelia Huck, Jiri Pirko, kwankhede@nvidia.com,
davem@davemloft.net, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org, netdev@vger.kernel.org
In-Reply-To: <20190827102435.7bd30ef3@x1.home>
> -----Original Message-----
> From: Alex Williamson <alex.williamson@redhat.com>
> Sent: Tuesday, August 27, 2019 9:55 PM
> To: Parav Pandit <parav@mellanox.com>
> Cc: Cornelia Huck <cohuck@redhat.com>; Jiri Pirko <jiri@mellanox.com>;
> kwankhede@nvidia.com; davem@davemloft.net; kvm@vger.kernel.org;
> linux-kernel@vger.kernel.org; netdev@vger.kernel.org
> Subject: Re: [PATCH 2/4] mdev: Make mdev alias unique among all mdevs
>
> On Tue, 27 Aug 2019 16:13:27 +0000
> Parav Pandit <parav@mellanox.com> wrote:
>
> > > -----Original Message-----
> > > From: Alex Williamson <alex.williamson@redhat.com>
> > > Sent: Tuesday, August 27, 2019 8:59 PM
> > > To: Cornelia Huck <cohuck@redhat.com>
> > > Cc: Parav Pandit <parav@mellanox.com>; Jiri Pirko
> > > <jiri@mellanox.com>; kwankhede@nvidia.com; davem@davemloft.net;
> > > kvm@vger.kernel.org; linux- kernel@vger.kernel.org;
> > > netdev@vger.kernel.org
> > > Subject: Re: [PATCH 2/4] mdev: Make mdev alias unique among all
> > > mdevs
> > >
> > > On Tue, 27 Aug 2019 13:29:46 +0200
> > > Cornelia Huck <cohuck@redhat.com> wrote:
> > >
> > > > On Tue, 27 Aug 2019 11:08:59 +0000 Parav Pandit
> > > > <parav@mellanox.com> wrote:
> > > >
> > > > > > -----Original Message-----
> > > > > > From: Cornelia Huck <cohuck@redhat.com>
> > > > > > Sent: Tuesday, August 27, 2019 3:59 PM
> > > > > > To: Parav Pandit <parav@mellanox.com>
> > > > > > Cc: alex.williamson@redhat.com; Jiri Pirko
> > > > > > <jiri@mellanox.com>; kwankhede@nvidia.com;
> > > > > > davem@davemloft.net; kvm@vger.kernel.org;
> > > > > > linux- kernel@vger.kernel.org; netdev@vger.kernel.org
> > > > > > Subject: Re: [PATCH 2/4] mdev: Make mdev alias unique among
> > > > > > all mdevs
> > > > > >
> > > > > > On Mon, 26 Aug 2019 15:41:17 -0500 Parav Pandit
> > > > > > <parav@mellanox.com> wrote:
> > > > > >
> > > > > > > Mdev alias should be unique among all the mdevs, so that
> > > > > > > when such alias is used by the mdev users to derive other
> > > > > > > objects, there is no collision in a given system.
> > > > > > >
> > > > > > > Signed-off-by: Parav Pandit <parav@mellanox.com>
> > > > > > > ---
> > > > > > > drivers/vfio/mdev/mdev_core.c | 5 +++++
> > > > > > > 1 file changed, 5 insertions(+)
> > > > > > >
> > > > > > > diff --git a/drivers/vfio/mdev/mdev_core.c
> > > > > > > b/drivers/vfio/mdev/mdev_core.c index
> > > > > > > e825ff38b037..6eb37f0c6369
> > > > > > > 100644
> > > > > > > --- a/drivers/vfio/mdev/mdev_core.c
> > > > > > > +++ b/drivers/vfio/mdev/mdev_core.c
> > > > > > > @@ -375,6 +375,11 @@ int mdev_device_create(struct kobject
> > > > > > > *kobj,
> > > struct
> > > > > > device *dev,
> > > > > > > ret = -EEXIST;
> > > > > > > goto mdev_fail;
> > > > > > > }
> > > > > > > + if (tmp->alias && strcmp(tmp->alias, alias) == 0) {
> > > > > >
> > > > > > Any way we can relay to the caller that the uuid was fine, but
> > > > > > that we had a hash collision? Duplicate uuids are much more
> > > > > > obvious than
> > > a collision here.
> > > > > >
> > > > > How do you want to relay this rare event?
> > > > > Netlink interface has way to return the error message back, but
> > > > > sysfs is
> > > limited due to its error code based interface.
> > > >
> > > > I don't know, that's why I asked :)
> > > >
> > > > The problem is that "uuid already used" and "hash collision" are
> > > > indistinguishable. While "use a different uuid" will probably work
> > > > in both cases, "increase alias length" might be a good alternative
> > > > in some cases.
> > > >
> > > > But if there is no good way to relay the problem, we can live with it.
> > >
> > > It's a rare event, maybe just dev_dbg(dev, "Hash collision creating alias
> \"%s\"
> > > for mdev device %pUl\n",...
> > >
> > Ok.
> > dev_dbg_once() to avoid message flood.
>
> I'd suggest a rate-limit rather than a once. The fact that the kernel may have
> experienced a collision at some time in the past does not help someone
> debug why they can't create a device now. The only way we're going to get a
> flood is if a user sufficiently privileged to create mdev devices stumbles onto
> a collision and continues to repeat the same operation. That falls into
> shoot-yourself-in-the-foot behavior imo.
> Thanks,
>
Ok. Will do.
^ permalink raw reply
* Re: [PATCH v5 net-next 14/18] ionic: Add Tx and Rx handling
From: Shannon Nelson @ 2019-08-27 18:52 UTC (permalink / raw)
To: Yunsheng Lin, netdev, davem
In-Reply-To: <664bbe2c-0e28-6e4a-a44e-c498259be842@huawei.com>
On 8/26/19 7:32 PM, Yunsheng Lin wrote:
> On 2019/8/27 5:33, Shannon Nelson wrote:
>> Add both the Tx and Rx queue setup and handling. The related
>> stats display comes later. Instead of using the generic napi
>> routines used by the slow-path commands, the Tx and Rx paths
>> are simplified and inlined in one file in order to get better
>> compiler optimizations.
>>
>> Signed-off-by: Shannon Nelson <snelson@pensando.io>
>> ---
[...]
>> +static int ionic_txrx_init(struct ionic_lif *lif)
>> +{
>> + unsigned int i;
>> + int err;
>> +
>> + for (i = 0; i < lif->nxqs; i++) {
>> + err = ionic_lif_txq_init(lif, lif->txqcqs[i].qcq);
>> + if (err)
>> + goto err_out;
>> +
>> + err = ionic_lif_rxq_init(lif, lif->rxqcqs[i].qcq);
>> + if (err) {
>> + ionic_lif_qcq_deinit(lif, lif->txqcqs[i-1].qcq);
>> + goto err_out;
>> + }
>> + }
>> +
>> + ionic_set_rx_mode(lif->netdev);
>> +
>> + return 0;
>> +
>> +err_out:
>> + for (i--; i > 0; i--) {
>> + ionic_lif_qcq_deinit(lif, lif->txqcqs[i-1].qcq);
>> + ionic_lif_qcq_deinit(lif, lif->rxqcqs[i-1].qcq);
>> + }
> The "i--" has been done in for initialization, and
> ionic_lif_qcq_deinit is called with lif->rxqcqs[i-1], which may
> cause the last lif->txqcqs or lif->rxqcqs not initialized problem.
>
> It may be more common to do the below:
> while (i--) {
> ionic_lif_qcq_deinit(lif, lif->txqcqs[i].qcq);
> ionic_lif_qcq_deinit(lif, lif->rxqcqs[i].qcq);
> }
Sure.
>> +
>> + return err;
>> +}
>> +
>> +static int ionic_txrx_enable(struct ionic_lif *lif)
>> +{
>> + int i, err;
>> +
>> + for (i = 0; i < lif->nxqs; i++) {
>> + err = ionic_qcq_enable(lif->txqcqs[i].qcq);
>> + if (err)
>> + goto err_out;
>> +
>> + ionic_rx_fill(&lif->rxqcqs[i].qcq->q);
>> + err = ionic_qcq_enable(lif->rxqcqs[i].qcq);
>> + if (err) {
>> + ionic_qcq_disable(lif->txqcqs[i].qcq);
>> + goto err_out;
>> + }
>> + }
>> +
>> + return 0;
>> +
>> +err_out:
>> + for (i--; i >= 0 ; i--) {
>> + ionic_qcq_disable(lif->rxqcqs[i].qcq);
>> + ionic_qcq_disable(lif->txqcqs[i].qcq);
>> + }
> It may be better to use the above pattern too.
Okay
>> +static dma_addr_t ionic_tx_map_single(struct ionic_queue *q, void *data, size_t len)
>> +{
>> + struct ionic_tx_stats *stats = q_to_tx_stats(q);
>> + struct device *dev = q->lif->ionic->dev;
>> + dma_addr_t dma_addr;
>> +
>> + dma_addr = dma_map_single(dev, data, len, DMA_TO_DEVICE);
>> + if (dma_mapping_error(dev, dma_addr)) {
>> + net_warn_ratelimited("%s: DMA single map failed on %s!\n",
>> + q->lif->netdev->name, q->name);
>> + stats->dma_map_err++;
>> + return 0;
> zero may be a valid dma address, maybe check the dma_mapping_error in
> ionic_tx_tso instead.
Hmmm, hadn't thought of 0 as a valid address...
I'll need to make a similar adjustment to ionic_tx_map_frag() uses.
>
>
> +
> +static void ionic_tx_tcp_inner_pseudo_csum(struct sk_buff *skb)
> +{
> + skb_cow_head(skb, 0);
> May need to check for return error of skb_cow_head.
Sure, and in both places.
Thanks,
sln
^ permalink raw reply
* Re: [PATCH v2 1/1] netfilter: nf_tables: fib: Drop IPV6 packages if IPv6 is disabled on boot
From: Pablo Neira Ayuso @ 2019-08-27 18:51 UTC (permalink / raw)
To: Leonardo Bras
Cc: netfilter-devel, coreteam, netdev, linux-kernel, Jozsef Kadlecsik,
Florian Westphal, David S. Miller, Alexey Kuznetsov,
Hideaki YOSHIFUJI
In-Reply-To: <77c43754ff72e9a2e8048ccd032351cf0186080a.camel@linux.ibm.com>
On Tue, Aug 27, 2019 at 02:34:14PM -0300, Leonardo Bras wrote:
> On Tue, 2019-08-27 at 12:35 +0200, Pablo Neira Ayuso wrote:
[...]
> > NFT_BREAK instead to stop evaluating this rule, this results in a
> > mismatch, so you let the user decide what to do with packets that do
> > not match your policy.
>
> Ok, I will replace for v3.
Thanks.
> > The drop case at the bottom of the fib eval function never actually
> > never happens.
>
> Which one do you mean?
Line 31 of net/netfilter/nft_fib_inet.c.
^ permalink raw reply
* [PATCH net] net: sched: act_sample: fix psample group handling on overwrite
From: Vlad Buslov @ 2019-08-27 18:49 UTC (permalink / raw)
To: netdev; +Cc: jhs, xiyou.wangcong, jiri, davem, dcaratti, Vlad Buslov
Action sample doesn't properly handle psample_group pointer in overwrite
case. Following issues need to be fixed:
- In tcf_sample_init() function RCU_INIT_POINTER() is used to set
s->psample_group, even though we neither setting the pointer to NULL, nor
preventing concurrent readers from accessing the pointer in some way.
Use rcu_swap_protected() instead to safely reset the pointer.
- Old value of s->psample_group is not released or deallocated in any way,
which results resource leak. Use psample_group_put() on non-NULL value
obtained with rcu_swap_protected().
- The function psample_group_put() that released reference to struct
psample_group pointed by rcu-pointer s->psample_group doesn't respect rcu
grace period when deallocating it. Extend struct psample_group with rcu
head and use kfree_rcu when freeing it.
Fixes: 5c5670fae430 ("net/sched: Introduce sample tc action")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
---
include/net/psample.h | 1 +
net/psample/psample.c | 2 +-
net/sched/act_sample.c | 6 +++++-
3 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/include/net/psample.h b/include/net/psample.h
index 37a4df2325b2..6b578ce69cd8 100644
--- a/include/net/psample.h
+++ b/include/net/psample.h
@@ -11,6 +11,7 @@ struct psample_group {
u32 group_num;
u32 refcount;
u32 seq;
+ struct rcu_head rcu;
};
struct psample_group *psample_group_get(struct net *net, u32 group_num);
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 841f198ea1a8..66e4b61a350d 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -154,7 +154,7 @@ static void psample_group_destroy(struct psample_group *group)
{
psample_group_notify(group, PSAMPLE_CMD_DEL_GROUP);
list_del(&group->list);
- kfree(group);
+ kfree_rcu(group, rcu);
}
static struct psample_group *
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 595308d60133..b75377d8c596 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -102,13 +102,17 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
s->rate = rate;
s->psample_group_num = psample_group_num;
- RCU_INIT_POINTER(s->psample_group, psample_group);
+ rcu_swap_protected(s->psample_group, psample_group,
+ lockdep_is_held(&s->tcf_lock));
if (tb[TCA_SAMPLE_TRUNC_SIZE]) {
s->truncate = true;
s->trunc_size = nla_get_u32(tb[TCA_SAMPLE_TRUNC_SIZE]);
}
spin_unlock_bh(&s->tcf_lock);
+
+ if (psample_group)
+ psample_group_put(psample_group);
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
--
2.21.0
^ permalink raw reply related
* [PATCH net-next 4/4] r8169: add support for EEE on RTL8125
From: Heiner Kallweit @ 2019-08-27 18:42 UTC (permalink / raw)
To: Realtek linux nic maintainers, David Miller
Cc: netdev@vger.kernel.org, Chun-Hao Lin
In-Reply-To: <55099fc6-1e29-4023-337c-98fc04189e5e@gmail.com>
This adds EEE support for RTL8125 based on the vendor driver.
Supported is EEE for 100Mbps and 1Gbps. Realtek recommended to not yet
enable EEE for 2.5Gbps due to potential compatibility issues. Also
ethtool doesn't support yet controlling EEE for 2.5Gbps and 5Gbps.
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
drivers/net/ethernet/realtek/r8169_main.c | 24 +++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index d9adc45fa..b00dbee0c 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2271,6 +2271,12 @@ static void rtl8168_config_eee_mac(struct rtl8169_private *tp)
rtl_eri_set_bits(tp, 0x1b0, ERIAR_MASK_1111, 0x0003);
}
+static void rtl8125_config_eee_mac(struct rtl8169_private *tp)
+{
+ r8168_mac_ocp_modify(tp, 0xe040, 0, BIT(1) | BIT(0));
+ r8168_mac_ocp_modify(tp, 0xeb62, 0, BIT(2) | BIT(1));
+}
+
static void rtl8168f_config_eee_phy(struct rtl8169_private *tp)
{
struct phy_device *phydev = tp->phydev;
@@ -2301,6 +2307,16 @@ static void rtl8168h_config_eee_phy(struct rtl8169_private *tp)
phy_modify_paged(phydev, 0xa42, 0x14, 0x0000, 0x0080);
}
+static void rtl8125_config_eee_phy(struct rtl8169_private *tp)
+{
+ struct phy_device *phydev = tp->phydev;
+
+ rtl8168h_config_eee_phy(tp);
+
+ phy_modify_paged(phydev, 0xa6d, 0x12, 0x0001, 0x0000);
+ phy_modify_paged(phydev, 0xa6d, 0x14, 0x0010, 0x0000);
+}
+
static void rtl8169s_hw_phy_config(struct rtl8169_private *tp)
{
static const struct phy_reg phy_reg_init[] = {
@@ -3672,6 +3688,9 @@ static void rtl8125_1_hw_phy_config(struct rtl8169_private *tp)
phy_modify_paged(phydev, 0xbf0, 0x15, 0x0e00, 0x0a00);
phy_modify_paged(phydev, 0xa5c, 0x10, 0x0400, 0x0000);
phy_modify_paged(phydev, 0xa44, 0x11, 0x0000, 0x0800);
+
+ rtl8125_config_eee_phy(tp);
+ rtl_enable_eee(tp);
}
static void rtl8125_2_hw_phy_config(struct rtl8169_private *tp)
@@ -3741,6 +3760,9 @@ static void rtl8125_2_hw_phy_config(struct rtl8169_private *tp)
phy_modify_paged(phydev, 0xad4, 0x17, 0x0010, 0x0000);
phy_modify_paged(phydev, 0xa86, 0x15, 0x0001, 0x0000);
phy_modify_paged(phydev, 0xa44, 0x11, 0x0000, 0x0800);
+
+ rtl8125_config_eee_phy(tp);
+ rtl_enable_eee(tp);
}
static void rtl_hw_phy_config(struct net_device *dev)
@@ -5263,6 +5285,8 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp)
rtl_udelay_loop_wait_low(tp, &rtl_mac_ocp_e00e_cond, 1000, 10);
+ rtl8125_config_eee_mac(tp);
+
RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
udelay(10);
}
--
2.23.0
^ permalink raw reply related
* [PATCH net-next 3/4] r8169: add RTL8125 PHY initialization
From: Heiner Kallweit @ 2019-08-27 18:42 UTC (permalink / raw)
To: Realtek linux nic maintainers, David Miller
Cc: netdev@vger.kernel.org, Chun-Hao Lin
In-Reply-To: <55099fc6-1e29-4023-337c-98fc04189e5e@gmail.com>
This patch adds PHY initialization magic copied from the r8125 vendor
driver. In addition it supports loading the firmware for chip version
RTL_GIGA_MAC_VER_61.
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
drivers/net/ethernet/realtek/r8169_main.c | 130 +++++++++++++++++++++-
1 file changed, 127 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index e7e953b7c..d9adc45fa 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -55,6 +55,7 @@
#define FIRMWARE_8168H_2 "rtl_nic/rtl8168h-2.fw"
#define FIRMWARE_8107E_1 "rtl_nic/rtl8107e-1.fw"
#define FIRMWARE_8107E_2 "rtl_nic/rtl8107e-2.fw"
+#define FIRMWARE_8125A_3 "rtl_nic/rtl8125a-3.fw"
#define R8169_MSG_DEFAULT \
(NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN)
@@ -203,7 +204,7 @@ static const struct {
[RTL_GIGA_MAC_VER_50] = {"RTL8168ep/8111ep" },
[RTL_GIGA_MAC_VER_51] = {"RTL8168ep/8111ep" },
[RTL_GIGA_MAC_VER_60] = {"RTL8125" },
- [RTL_GIGA_MAC_VER_61] = {"RTL8125" },
+ [RTL_GIGA_MAC_VER_61] = {"RTL8125", FIRMWARE_8125A_3},
};
static const struct pci_device_id rtl8169_pci_tbl[] = {
@@ -714,6 +715,7 @@ MODULE_FIRMWARE(FIRMWARE_8168H_1);
MODULE_FIRMWARE(FIRMWARE_8168H_2);
MODULE_FIRMWARE(FIRMWARE_8107E_1);
MODULE_FIRMWARE(FIRMWARE_8107E_2);
+MODULE_FIRMWARE(FIRMWARE_8125A_3);
static inline struct device *tp_to_dev(struct rtl8169_private *tp)
{
@@ -3619,6 +3621,128 @@ static void rtl8106e_hw_phy_config(struct rtl8169_private *tp)
rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x0000);
}
+static void rtl8125_1_hw_phy_config(struct rtl8169_private *tp)
+{
+ struct phy_device *phydev = tp->phydev;
+
+ phy_modify_paged(phydev, 0xad4, 0x10, 0x03ff, 0x0084);
+ phy_modify_paged(phydev, 0xad4, 0x17, 0x0000, 0x0010);
+ phy_modify_paged(phydev, 0xad1, 0x13, 0x03ff, 0x0006);
+ phy_modify_paged(phydev, 0xad3, 0x11, 0x003f, 0x0006);
+ phy_modify_paged(phydev, 0xac0, 0x14, 0x0000, 0x1100);
+ phy_modify_paged(phydev, 0xac8, 0x15, 0xf000, 0x7000);
+ phy_modify_paged(phydev, 0xad1, 0x14, 0x0000, 0x0400);
+ phy_modify_paged(phydev, 0xad1, 0x15, 0x0000, 0x03ff);
+ phy_modify_paged(phydev, 0xad1, 0x16, 0x0000, 0x03ff);
+
+ phy_write(phydev, 0x1f, 0x0a43);
+ phy_write(phydev, 0x13, 0x80ea);
+ phy_modify(phydev, 0x14, 0xff00, 0xc400);
+ phy_write(phydev, 0x13, 0x80eb);
+ phy_modify(phydev, 0x14, 0x0700, 0x0300);
+ phy_write(phydev, 0x13, 0x80f8);
+ phy_modify(phydev, 0x14, 0xff00, 0x1c00);
+ phy_write(phydev, 0x13, 0x80f1);
+ phy_modify(phydev, 0x14, 0xff00, 0x3000);
+ phy_write(phydev, 0x13, 0x80fe);
+ phy_modify(phydev, 0x14, 0xff00, 0xa500);
+ phy_write(phydev, 0x13, 0x8102);
+ phy_modify(phydev, 0x14, 0xff00, 0x5000);
+ phy_write(phydev, 0x13, 0x8105);
+ phy_modify(phydev, 0x14, 0xff00, 0x3300);
+ phy_write(phydev, 0x13, 0x8100);
+ phy_modify(phydev, 0x14, 0xff00, 0x7000);
+ phy_write(phydev, 0x13, 0x8104);
+ phy_modify(phydev, 0x14, 0xff00, 0xf000);
+ phy_write(phydev, 0x13, 0x8106);
+ phy_modify(phydev, 0x14, 0xff00, 0x6500);
+ phy_write(phydev, 0x13, 0x80dc);
+ phy_modify(phydev, 0x14, 0xff00, 0xed00);
+ phy_write(phydev, 0x13, 0x80df);
+ phy_set_bits(phydev, 0x14, BIT(8));
+ phy_write(phydev, 0x13, 0x80e1);
+ phy_clear_bits(phydev, 0x14, BIT(8));
+ phy_write(phydev, 0x1f, 0x0000);
+
+ phy_modify_paged(phydev, 0xbf0, 0x13, 0x003f, 0x0038);
+ phy_write_paged(phydev, 0xa43, 0x13, 0x819f);
+ phy_write_paged(phydev, 0xa43, 0x14, 0xd0b6);
+
+ phy_write_paged(phydev, 0xbc3, 0x12, 0x5555);
+ phy_modify_paged(phydev, 0xbf0, 0x15, 0x0e00, 0x0a00);
+ phy_modify_paged(phydev, 0xa5c, 0x10, 0x0400, 0x0000);
+ phy_modify_paged(phydev, 0xa44, 0x11, 0x0000, 0x0800);
+}
+
+static void rtl8125_2_hw_phy_config(struct rtl8169_private *tp)
+{
+ struct phy_device *phydev = tp->phydev;
+ int i;
+
+ phy_modify_paged(phydev, 0xad4, 0x17, 0x0000, 0x0010);
+ phy_modify_paged(phydev, 0xad1, 0x13, 0x03ff, 0x03ff);
+ phy_modify_paged(phydev, 0xad3, 0x11, 0x003f, 0x0006);
+ phy_modify_paged(phydev, 0xac0, 0x14, 0x1100, 0x0000);
+ phy_modify_paged(phydev, 0xacc, 0x10, 0x0003, 0x0002);
+ phy_modify_paged(phydev, 0xad4, 0x10, 0x00e7, 0x0044);
+ phy_modify_paged(phydev, 0xac1, 0x12, 0x0080, 0x0000);
+ phy_modify_paged(phydev, 0xac8, 0x10, 0x0300, 0x0000);
+ phy_modify_paged(phydev, 0xac5, 0x17, 0x0007, 0x0002);
+ phy_write_paged(phydev, 0xad4, 0x16, 0x00a8);
+ phy_write_paged(phydev, 0xac5, 0x16, 0x01ff);
+ phy_modify_paged(phydev, 0xac8, 0x15, 0x00f0, 0x0030);
+
+ phy_write(phydev, 0x1f, 0x0b87);
+ phy_write(phydev, 0x16, 0x80a2);
+ phy_write(phydev, 0x17, 0x0153);
+ phy_write(phydev, 0x16, 0x809c);
+ phy_write(phydev, 0x17, 0x0153);
+ phy_write(phydev, 0x1f, 0x0000);
+
+ phy_write(phydev, 0x1f, 0x0a43);
+ phy_write(phydev, 0x13, 0x81B3);
+ phy_write(phydev, 0x14, 0x0043);
+ phy_write(phydev, 0x14, 0x00A7);
+ phy_write(phydev, 0x14, 0x00D6);
+ phy_write(phydev, 0x14, 0x00EC);
+ phy_write(phydev, 0x14, 0x00F6);
+ phy_write(phydev, 0x14, 0x00FB);
+ phy_write(phydev, 0x14, 0x00FD);
+ phy_write(phydev, 0x14, 0x00FF);
+ phy_write(phydev, 0x14, 0x00BB);
+ phy_write(phydev, 0x14, 0x0058);
+ phy_write(phydev, 0x14, 0x0029);
+ phy_write(phydev, 0x14, 0x0013);
+ phy_write(phydev, 0x14, 0x0009);
+ phy_write(phydev, 0x14, 0x0004);
+ phy_write(phydev, 0x14, 0x0002);
+ for (i = 0; i < 25; i++)
+ phy_write(phydev, 0x14, 0x0000);
+
+ phy_write(phydev, 0x13, 0x8257);
+ phy_write(phydev, 0x14, 0x020F);
+
+ phy_write(phydev, 0x13, 0x80EA);
+ phy_write(phydev, 0x14, 0x7843);
+ phy_write(phydev, 0x1f, 0x0000);
+
+ rtl_apply_firmware(tp);
+
+ phy_modify_paged(phydev, 0xd06, 0x14, 0x0000, 0x2000);
+
+ phy_write(phydev, 0x1f, 0x0a43);
+ phy_write(phydev, 0x13, 0x81a2);
+ phy_set_bits(phydev, 0x14, BIT(8));
+ phy_write(phydev, 0x1f, 0x0000);
+
+ phy_modify_paged(phydev, 0xb54, 0x16, 0xff00, 0xdb00);
+ phy_modify_paged(phydev, 0xa45, 0x12, 0x0001, 0x0000);
+ phy_modify_paged(phydev, 0xa5d, 0x12, 0x0000, 0x0020);
+ phy_modify_paged(phydev, 0xad4, 0x17, 0x0010, 0x0000);
+ phy_modify_paged(phydev, 0xa86, 0x15, 0x0001, 0x0000);
+ phy_modify_paged(phydev, 0xa44, 0x11, 0x0000, 0x0800);
+}
+
static void rtl_hw_phy_config(struct net_device *dev)
{
static const rtl_generic_fct phy_configs[] = {
@@ -3674,8 +3798,8 @@ static void rtl_hw_phy_config(struct net_device *dev)
[RTL_GIGA_MAC_VER_49] = rtl8168ep_1_hw_phy_config,
[RTL_GIGA_MAC_VER_50] = rtl8168ep_2_hw_phy_config,
[RTL_GIGA_MAC_VER_51] = rtl8168ep_2_hw_phy_config,
- [RTL_GIGA_MAC_VER_60] = NULL,
- [RTL_GIGA_MAC_VER_61] = NULL,
+ [RTL_GIGA_MAC_VER_60] = rtl8125_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_61] = rtl8125_2_hw_phy_config,
};
struct rtl8169_private *tp = netdev_priv(dev);
--
2.23.0
^ permalink raw reply related
* [PATCH net-next 2/4] r8169: add support for RTL8125
From: Heiner Kallweit @ 2019-08-27 18:41 UTC (permalink / raw)
To: Realtek linux nic maintainers, David Miller
Cc: netdev@vger.kernel.org, Chun-Hao Lin
In-Reply-To: <55099fc6-1e29-4023-337c-98fc04189e5e@gmail.com>
This adds support for 2.5Gbps chip RTL8125, it's partially based on the
r8125 vendor driver. Tested with a Delock 89531 PCIe card against a
Netgear GS110MX Multi-Gig switch. Firmware isn't strictly needed,
but on some systems there may be compatibility issues w/o firmware.
Firmware has been submitted to linux-firmware.
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
drivers/net/ethernet/realtek/Kconfig | 9 +-
drivers/net/ethernet/realtek/r8169_main.c | 274 ++++++++++++++++++++--
2 files changed, 265 insertions(+), 18 deletions(-)
diff --git a/drivers/net/ethernet/realtek/Kconfig b/drivers/net/ethernet/realtek/Kconfig
index b18e7a91d..5e0b9d2f1 100644
--- a/drivers/net/ethernet/realtek/Kconfig
+++ b/drivers/net/ethernet/realtek/Kconfig
@@ -96,14 +96,19 @@ config 8139_OLD_RX_RESET
old RX-reset behavior. If unsure, say N.
config R8169
- tristate "Realtek 8169 gigabit ethernet support"
+ tristate "Realtek 8169/8168/8101/8125 ethernet support"
depends on PCI
select FW_LOADER
select CRC32
select PHYLIB
select REALTEK_PHY
---help---
- Say Y here if you have a Realtek 8169 PCI Gigabit Ethernet adapter.
+ Say Y here if you have a Realtek Ethernet adapter belonging to
+ the following families:
+ RTL8169 Gigabit Ethernet
+ RTL8168 Gigabit Ethernet
+ RTL8101 Fast Ethernet
+ RTL8125 2.5GBit Ethernet
To compile this driver as a module, choose M here: the module
will be called r8169. This is recommended.
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 32b444d13..e7e953b7c 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -135,6 +135,8 @@ enum mac_version {
RTL_GIGA_MAC_VER_49,
RTL_GIGA_MAC_VER_50,
RTL_GIGA_MAC_VER_51,
+ RTL_GIGA_MAC_VER_60,
+ RTL_GIGA_MAC_VER_61,
RTL_GIGA_MAC_NONE
};
@@ -200,6 +202,8 @@ static const struct {
[RTL_GIGA_MAC_VER_49] = {"RTL8168ep/8111ep" },
[RTL_GIGA_MAC_VER_50] = {"RTL8168ep/8111ep" },
[RTL_GIGA_MAC_VER_51] = {"RTL8168ep/8111ep" },
+ [RTL_GIGA_MAC_VER_60] = {"RTL8125" },
+ [RTL_GIGA_MAC_VER_61] = {"RTL8125" },
};
static const struct pci_device_id rtl8169_pci_tbl[] = {
@@ -220,6 +224,8 @@ static const struct pci_device_id rtl8169_pci_tbl[] = {
{ PCI_VDEVICE(USR, 0x0116) },
{ PCI_VENDOR_ID_LINKSYS, 0x1032, PCI_ANY_ID, 0x0024 },
{ 0x0001, 0x8168, PCI_ANY_ID, 0x2410 },
+ { PCI_VDEVICE(REALTEK, 0x8125) },
+ { PCI_VDEVICE(REALTEK, 0x3000) },
{}
};
@@ -384,6 +390,19 @@ enum rtl8168_registers {
#define EARLY_TALLY_EN (1 << 16)
};
+enum rtl8125_registers {
+ IntrMask_8125 = 0x38,
+ IntrStatus_8125 = 0x3c,
+ TxPoll_8125 = 0x90,
+ MAC0_BKP = 0x19e0,
+};
+
+#define RX_VLAN_INNER_8125 BIT(22)
+#define RX_VLAN_OUTER_8125 BIT(23)
+#define RX_VLAN_8125 (RX_VLAN_INNER_8125 | RX_VLAN_OUTER_8125)
+
+#define RX_FETCH_DFLT_8125 (8 << 27)
+
enum rtl_register_content {
/* InterruptStatusBits */
SYSErr = 0x8000,
@@ -727,6 +746,11 @@ static void rtl_tx_performance_tweak(struct rtl8169_private *tp, u16 force)
PCI_EXP_DEVCTL_READRQ, force);
}
+static bool rtl_is_8125(struct rtl8169_private *tp)
+{
+ return tp->mac_version >= RTL_GIGA_MAC_VER_60;
+}
+
static bool rtl_is_8168evl_up(struct rtl8169_private *tp)
{
return tp->mac_version >= RTL_GIGA_MAC_VER_34 &&
@@ -1023,7 +1047,7 @@ static void rtl_writephy(struct rtl8169_private *tp, int location, int val)
case RTL_GIGA_MAC_VER_31:
r8168dp_2_mdio_write(tp, location, val);
break;
- case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
+ case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_61:
r8168g_mdio_write(tp, location, val);
break;
default:
@@ -1040,7 +1064,7 @@ static int rtl_readphy(struct rtl8169_private *tp, int location)
case RTL_GIGA_MAC_VER_28:
case RTL_GIGA_MAC_VER_31:
return r8168dp_2_mdio_read(tp, location);
- case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
+ case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_61:
return r8168g_mdio_read(tp, location);
default:
return r8169_mdio_read(tp, location);
@@ -1324,17 +1348,26 @@ static u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr)
static u32 rtl_get_events(struct rtl8169_private *tp)
{
- return RTL_R16(tp, IntrStatus);
+ if (rtl_is_8125(tp))
+ return RTL_R32(tp, IntrStatus_8125);
+ else
+ return RTL_R16(tp, IntrStatus);
}
static void rtl_ack_events(struct rtl8169_private *tp, u32 bits)
{
- RTL_W16(tp, IntrStatus, bits);
+ if (rtl_is_8125(tp))
+ RTL_W32(tp, IntrStatus_8125, bits);
+ else
+ RTL_W16(tp, IntrStatus, bits);
}
static void rtl_irq_disable(struct rtl8169_private *tp)
{
- RTL_W16(tp, IntrMask, 0);
+ if (rtl_is_8125(tp))
+ RTL_W32(tp, IntrMask_8125, 0);
+ else
+ RTL_W16(tp, IntrMask, 0);
tp->irq_enabled = 0;
}
@@ -1345,7 +1378,10 @@ static void rtl_irq_disable(struct rtl8169_private *tp)
static void rtl_irq_enable(struct rtl8169_private *tp)
{
tp->irq_enabled = 1;
- RTL_W16(tp, IntrMask, tp->irq_mask);
+ if (rtl_is_8125(tp))
+ RTL_W32(tp, IntrMask_8125, tp->irq_mask);
+ else
+ RTL_W16(tp, IntrMask, tp->irq_mask);
}
static void rtl8169_irq_mask_and_ack(struct rtl8169_private *tp)
@@ -1410,7 +1446,6 @@ static void rtl8169_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
{
- unsigned int i, tmp;
static const struct {
u32 opt;
u16 reg;
@@ -1423,20 +1458,25 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
{ WAKE_ANY, Config5, LanWake },
{ WAKE_MAGIC, Config3, MagicPacket }
};
+ unsigned int i, tmp = ARRAY_SIZE(cfg);
u8 options;
rtl_unlock_config_regs(tp);
if (rtl_is_8168evl_up(tp)) {
- tmp = ARRAY_SIZE(cfg) - 1;
+ tmp--;
if (wolopts & WAKE_MAGIC)
rtl_eri_set_bits(tp, 0x0dc, ERIAR_MASK_0100,
MagicPacket_v2);
else
rtl_eri_clear_bits(tp, 0x0dc, ERIAR_MASK_0100,
MagicPacket_v2);
- } else {
- tmp = ARRAY_SIZE(cfg);
+ } else if (rtl_is_8125(tp)) {
+ tmp--;
+ if (wolopts & WAKE_MAGIC)
+ r8168_mac_ocp_modify(tp, 0xc0b6, 0, BIT(0));
+ else
+ r8168_mac_ocp_modify(tp, 0xc0b6, BIT(0), 0);
}
for (i = 0; i < tmp; i++) {
@@ -1542,6 +1582,13 @@ static int rtl8169_set_features(struct net_device *dev,
else
rx_config &= ~(AcceptErr | AcceptRunt);
+ if (rtl_is_8125(tp)) {
+ if (features & NETIF_F_HW_VLAN_CTAG_RX)
+ rx_config |= RX_VLAN_8125;
+ else
+ rx_config &= ~RX_VLAN_8125;
+ }
+
RTL_W32(tp, RxConfig, rx_config);
if (features & NETIF_F_RXCSUM)
@@ -1549,10 +1596,12 @@ static int rtl8169_set_features(struct net_device *dev,
else
tp->cp_cmd &= ~RxChkSum;
- if (features & NETIF_F_HW_VLAN_CTAG_RX)
- tp->cp_cmd |= RxVlan;
- else
- tp->cp_cmd &= ~RxVlan;
+ if (!rtl_is_8125(tp)) {
+ if (features & NETIF_F_HW_VLAN_CTAG_RX)
+ tp->cp_cmd |= RxVlan;
+ else
+ tp->cp_cmd &= ~RxVlan;
+ }
RTL_W16(tp, CPlusCmd, tp->cp_cmd);
RTL_R16(tp, CPlusCmd);
@@ -1851,6 +1900,9 @@ static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
int i;
u16 w;
+ if (rtl_is_8125(tp))
+ return -EOPNOTSUPP;
+
memset(ec, 0, sizeof(*ec));
/* get rx/tx scale corresponding to current speed and CPlusCmd[0:1] */
@@ -1919,6 +1971,9 @@ static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
u16 w = 0, cp01;
int i;
+ if (rtl_is_8125(tp))
+ return -EOPNOTSUPP;
+
scale = rtl_coalesce_choose_scale(dev,
max(p[0].usecs, p[1].usecs) * 1000, &cp01);
if (IS_ERR(scale))
@@ -2065,6 +2120,10 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp)
u16 val;
u16 mac_version;
} mac_info[] = {
+ /* 8125 family. */
+ { 0x7cf, 0x608, RTL_GIGA_MAC_VER_60 },
+ { 0x7c8, 0x608, RTL_GIGA_MAC_VER_61 },
+
/* 8168EP family. */
{ 0x7cf, 0x502, RTL_GIGA_MAC_VER_51 },
{ 0x7cf, 0x501, RTL_GIGA_MAC_VER_50 },
@@ -3615,6 +3674,8 @@ static void rtl_hw_phy_config(struct net_device *dev)
[RTL_GIGA_MAC_VER_49] = rtl8168ep_1_hw_phy_config,
[RTL_GIGA_MAC_VER_50] = rtl8168ep_2_hw_phy_config,
[RTL_GIGA_MAC_VER_51] = rtl8168ep_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_60] = NULL,
+ [RTL_GIGA_MAC_VER_61] = NULL,
};
struct rtl8169_private *tp = netdev_priv(dev);
@@ -3742,6 +3803,8 @@ static void rtl_pll_power_down(struct rtl8169_private *tp)
case RTL_GIGA_MAC_VER_48:
case RTL_GIGA_MAC_VER_50:
case RTL_GIGA_MAC_VER_51:
+ case RTL_GIGA_MAC_VER_60:
+ case RTL_GIGA_MAC_VER_61:
RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
break;
case RTL_GIGA_MAC_VER_40:
@@ -3771,6 +3834,8 @@ static void rtl_pll_power_up(struct rtl8169_private *tp)
case RTL_GIGA_MAC_VER_48:
case RTL_GIGA_MAC_VER_50:
case RTL_GIGA_MAC_VER_51:
+ case RTL_GIGA_MAC_VER_60:
+ case RTL_GIGA_MAC_VER_61:
RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
break;
case RTL_GIGA_MAC_VER_40:
@@ -3803,6 +3868,10 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
break;
+ case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_61:
+ RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_VLAN_8125 |
+ RX_DMA_BURST);
+ break;
default:
RTL_W32(tp, RxConfig, RX128_INT_EN | RX_DMA_BURST);
break;
@@ -5020,6 +5089,126 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp)
rtl_hw_aspm_clkreq_enable(tp, true);
}
+DECLARE_RTL_COND(rtl_mac_ocp_e00e_cond)
+{
+ return r8168_mac_ocp_read(tp, 0xe00e) & BIT(13);
+}
+
+static void rtl_hw_start_8125_common(struct rtl8169_private *tp)
+{
+ rtl_pcie_state_l2l3_disable(tp);
+
+ RTL_W16(tp, 0x382, 0x221b);
+ RTL_W8(tp, 0x4500, 0);
+ RTL_W16(tp, 0x4800, 0);
+
+ /* disable UPS */
+ r8168_mac_ocp_modify(tp, 0xd40a, 0x0010, 0x0000);
+
+ RTL_W8(tp, Config1, RTL_R8(tp, Config1) & ~0x10);
+
+ r8168_mac_ocp_write(tp, 0xc140, 0xffff);
+ r8168_mac_ocp_write(tp, 0xc142, 0xffff);
+
+ r8168_mac_ocp_modify(tp, 0xd3e2, 0x0fff, 0x03a9);
+ r8168_mac_ocp_modify(tp, 0xd3e4, 0x00ff, 0x0000);
+ r8168_mac_ocp_modify(tp, 0xe860, 0x0000, 0x0080);
+
+ /* disable new tx descriptor format */
+ r8168_mac_ocp_modify(tp, 0xeb58, 0x0001, 0x0000);
+
+ r8168_mac_ocp_modify(tp, 0xe614, 0x0700, 0x0400);
+ r8168_mac_ocp_modify(tp, 0xe63e, 0x0c30, 0x0020);
+ r8168_mac_ocp_modify(tp, 0xc0b4, 0x0000, 0x000c);
+ r8168_mac_ocp_modify(tp, 0xeb6a, 0x00ff, 0x0033);
+ r8168_mac_ocp_modify(tp, 0xeb50, 0x03e0, 0x0040);
+ r8168_mac_ocp_modify(tp, 0xe056, 0x00f0, 0x0030);
+ r8168_mac_ocp_modify(tp, 0xe040, 0x1000, 0x0000);
+ r8168_mac_ocp_modify(tp, 0xe0c0, 0x4f0f, 0x4403);
+ r8168_mac_ocp_modify(tp, 0xe052, 0x0080, 0x0067);
+ r8168_mac_ocp_modify(tp, 0xc0ac, 0x0080, 0x1f00);
+ r8168_mac_ocp_modify(tp, 0xd430, 0x0fff, 0x047f);
+ r8168_mac_ocp_modify(tp, 0xe84c, 0x0000, 0x00c0);
+ r8168_mac_ocp_modify(tp, 0xea1c, 0x0004, 0x0000);
+ r8168_mac_ocp_modify(tp, 0xeb54, 0x0000, 0x0001);
+ udelay(1);
+ r8168_mac_ocp_modify(tp, 0xeb54, 0x0001, 0x0000);
+ RTL_W16(tp, 0x1880, RTL_R16(tp, 0x1880) & ~0x0030);
+
+ r8168_mac_ocp_write(tp, 0xe098, 0xc302);
+
+ rtl_udelay_loop_wait_low(tp, &rtl_mac_ocp_e00e_cond, 1000, 10);
+
+ RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
+ udelay(10);
+}
+
+static void rtl_hw_start_8125_1(struct rtl8169_private *tp)
+{
+ static const struct ephy_info e_info_8125_1[] = {
+ { 0x01, 0xffff, 0xa812 },
+ { 0x09, 0xffff, 0x520c },
+ { 0x04, 0xffff, 0xd000 },
+ { 0x0d, 0xffff, 0xf702 },
+ { 0x0a, 0xffff, 0x8653 },
+ { 0x06, 0xffff, 0x001e },
+ { 0x08, 0xffff, 0x3595 },
+ { 0x20, 0xffff, 0x9455 },
+ { 0x21, 0xffff, 0x99ff },
+ { 0x02, 0xffff, 0x6046 },
+ { 0x29, 0xffff, 0xfe00 },
+ { 0x23, 0xffff, 0xab62 },
+
+ { 0x41, 0xffff, 0xa80c },
+ { 0x49, 0xffff, 0x520c },
+ { 0x44, 0xffff, 0xd000 },
+ { 0x4d, 0xffff, 0xf702 },
+ { 0x4a, 0xffff, 0x8653 },
+ { 0x46, 0xffff, 0x001e },
+ { 0x48, 0xffff, 0x3595 },
+ { 0x60, 0xffff, 0x9455 },
+ { 0x61, 0xffff, 0x99ff },
+ { 0x42, 0xffff, 0x6046 },
+ { 0x69, 0xffff, 0xfe00 },
+ { 0x63, 0xffff, 0xab62 },
+ };
+
+ rtl_set_def_aspm_entry_latency(tp);
+
+ /* disable aspm and clock request before access ephy */
+ rtl_hw_aspm_clkreq_enable(tp, false);
+ rtl_ephy_init(tp, e_info_8125_1);
+
+ rtl_hw_start_8125_common(tp);
+}
+
+static void rtl_hw_start_8125_2(struct rtl8169_private *tp)
+{
+ static const struct ephy_info e_info_8125_2[] = {
+ { 0x04, 0xffff, 0xd000 },
+ { 0x0a, 0xffff, 0x8653 },
+ { 0x23, 0xffff, 0xab66 },
+ { 0x20, 0xffff, 0x9455 },
+ { 0x21, 0xffff, 0x99ff },
+ { 0x29, 0xffff, 0xfe04 },
+
+ { 0x44, 0xffff, 0xd000 },
+ { 0x4a, 0xffff, 0x8653 },
+ { 0x63, 0xffff, 0xab66 },
+ { 0x60, 0xffff, 0x9455 },
+ { 0x61, 0xffff, 0x99ff },
+ { 0x69, 0xffff, 0xfe04 },
+ };
+
+ rtl_set_def_aspm_entry_latency(tp);
+
+ /* disable aspm and clock request before access ephy */
+ rtl_hw_aspm_clkreq_enable(tp, false);
+ rtl_ephy_init(tp, e_info_8125_2);
+
+ rtl_hw_start_8125_common(tp);
+}
+
static void rtl_hw_config(struct rtl8169_private *tp)
{
static const rtl_generic_fct hw_configs[] = {
@@ -5068,12 +5257,25 @@ static void rtl_hw_config(struct rtl8169_private *tp)
[RTL_GIGA_MAC_VER_49] = rtl_hw_start_8168ep_1,
[RTL_GIGA_MAC_VER_50] = rtl_hw_start_8168ep_2,
[RTL_GIGA_MAC_VER_51] = rtl_hw_start_8168ep_3,
+ [RTL_GIGA_MAC_VER_60] = rtl_hw_start_8125_1,
+ [RTL_GIGA_MAC_VER_61] = rtl_hw_start_8125_2,
};
if (hw_configs[tp->mac_version])
hw_configs[tp->mac_version](tp);
}
+static void rtl_hw_start_8125(struct rtl8169_private *tp)
+{
+ int i;
+
+ /* disable interrupt coalescing */
+ for (i = 0xa00; i < 0xb00; i += 4)
+ RTL_W32(tp, i, 0);
+
+ rtl_hw_config(tp);
+}
+
static void rtl_hw_start_8168(struct rtl8169_private *tp)
{
if (tp->mac_version == RTL_GIGA_MAC_VER_13 ||
@@ -5127,6 +5329,8 @@ static void rtl_hw_start(struct rtl8169_private *tp)
if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
rtl_hw_start_8169(tp);
+ else if (rtl_is_8125(tp))
+ rtl_hw_start_8125(tp);
else
rtl_hw_start_8168(tp);
@@ -5510,6 +5714,14 @@ static bool rtl_chip_supports_csum_v2(struct rtl8169_private *tp)
}
}
+static void rtl8169_doorbell(struct rtl8169_private *tp)
+{
+ if (rtl_is_8125(tp))
+ RTL_W16(tp, TxPoll_8125, BIT(0));
+ else
+ RTL_W8(tp, TxPoll, NPQ);
+}
+
static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
struct net_device *dev)
{
@@ -5589,7 +5801,7 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
}
if (door_bell)
- RTL_W8(tp, TxPoll, NPQ);
+ rtl8169_doorbell(tp);
if (unlikely(stop_queue)) {
/* Sync with rtl_tx:
@@ -5751,7 +5963,7 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp,
* it is slow enough). -- FR
*/
if (tp->cur_tx != dirty_tx)
- RTL_W8(tp, TxPoll, NPQ);
+ rtl8169_doorbell(tp);
}
}
@@ -6473,6 +6685,8 @@ static void rtl_read_mac_address(struct rtl8169_private *tp,
value = rtl_eri_read(tp, 0xe4);
mac_addr[4] = (value >> 0) & 0xff;
mac_addr[5] = (value >> 8) & 0xff;
+ } else if (rtl_is_8125(tp)) {
+ rtl_read_mac_from_reg(tp, mac_addr, MAC0_BKP);
}
}
@@ -6570,6 +6784,31 @@ static void rtl_hw_init_8168g(struct rtl8169_private *tp)
rtl_udelay_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42);
}
+static void rtl_hw_init_8125(struct rtl8169_private *tp)
+{
+ tp->ocp_base = OCP_STD_PHY_BASE;
+
+ RTL_W32(tp, MISC, RTL_R32(tp, MISC) | RXDV_GATED_EN);
+
+ if (!rtl_udelay_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42))
+ return;
+
+ RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) & ~(CmdTxEnb | CmdRxEnb));
+ msleep(1);
+ RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
+
+ r8168_mac_ocp_modify(tp, 0xe8de, BIT(14), 0);
+
+ if (!rtl_udelay_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42))
+ return;
+
+ r8168_mac_ocp_write(tp, 0xc0aa, 0x07d0);
+ r8168_mac_ocp_write(tp, 0xc0a6, 0x0150);
+ r8168_mac_ocp_write(tp, 0xc01e, 0x5555);
+
+ rtl_udelay_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42);
+}
+
static void rtl_hw_initialize(struct rtl8169_private *tp)
{
switch (tp->mac_version) {
@@ -6579,6 +6818,9 @@ static void rtl_hw_initialize(struct rtl8169_private *tp)
case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_48:
rtl_hw_init_8168g(tp);
break;
+ case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_61:
+ rtl_hw_init_8125(tp);
+ break;
default:
break;
}
--
2.23.0
^ permalink raw reply related
* [PATCH net-next 1/4] r8169: prepare for adding RTL8125 support
From: Heiner Kallweit @ 2019-08-27 18:41 UTC (permalink / raw)
To: Realtek linux nic maintainers, David Miller
Cc: netdev@vger.kernel.org, Chun-Hao Lin
In-Reply-To: <55099fc6-1e29-4023-337c-98fc04189e5e@gmail.com>
This patch prepares the driver for adding RTL8125 support:
- change type of interrupt mask to u32
- restrict rtl_is_8168evl_up to RTL8168 chip versions
- factor out reading MAC address from registers
- re-add function rtl_get_events
- move disabling interrupt coalescing to RTL8169/RTL8168 init
- read different register for PCI commit
- don't use bit LastFrag in tx descriptor after send, RTL8125 clears it
No functional change intended.
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
drivers/net/ethernet/realtek/r8169_main.c | 44 ++++++++++++++++-------
1 file changed, 31 insertions(+), 13 deletions(-)
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index faa4041cf..32b444d13 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -645,7 +645,7 @@ struct rtl8169_private {
struct page *Rx_databuff[NUM_RX_DESC]; /* Rx data buffers */
struct ring_info tx_skb[NUM_TX_DESC]; /* Tx data buffers */
u16 cp_cmd;
- u16 irq_mask;
+ u32 irq_mask;
struct clk *clk;
struct {
@@ -730,7 +730,8 @@ static void rtl_tx_performance_tweak(struct rtl8169_private *tp, u16 force)
static bool rtl_is_8168evl_up(struct rtl8169_private *tp)
{
return tp->mac_version >= RTL_GIGA_MAC_VER_34 &&
- tp->mac_version != RTL_GIGA_MAC_VER_39;
+ tp->mac_version != RTL_GIGA_MAC_VER_39 &&
+ tp->mac_version <= RTL_GIGA_MAC_VER_51;
}
static bool rtl_supports_eee(struct rtl8169_private *tp)
@@ -740,6 +741,14 @@ static bool rtl_supports_eee(struct rtl8169_private *tp)
tp->mac_version != RTL_GIGA_MAC_VER_39;
}
+static void rtl_read_mac_from_reg(struct rtl8169_private *tp, u8 *mac, int reg)
+{
+ int i;
+
+ for (i = 0; i < ETH_ALEN; i++)
+ mac[i] = RTL_R8(tp, reg + i);
+}
+
struct rtl_cond {
bool (*check)(struct rtl8169_private *);
const char *msg;
@@ -1313,7 +1322,12 @@ static u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr)
RTL_R32(tp, EFUSEAR) & EFUSEAR_DATA_MASK : ~0;
}
-static void rtl_ack_events(struct rtl8169_private *tp, u16 bits)
+static u32 rtl_get_events(struct rtl8169_private *tp)
+{
+ return RTL_R16(tp, IntrStatus);
+}
+
+static void rtl_ack_events(struct rtl8169_private *tp, u32 bits)
{
RTL_W16(tp, IntrStatus, bits);
}
@@ -1337,7 +1351,7 @@ static void rtl_irq_enable(struct rtl8169_private *tp)
static void rtl8169_irq_mask_and_ack(struct rtl8169_private *tp)
{
rtl_irq_disable(tp);
- rtl_ack_events(tp, 0xffff);
+ rtl_ack_events(tp, 0xffffffff);
/* PCI commit */
RTL_R8(tp, ChipCmd);
}
@@ -5073,6 +5087,9 @@ static void rtl_hw_start_8168(struct rtl8169_private *tp)
RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
rtl_hw_config(tp);
+
+ /* disable interrupt coalescing */
+ RTL_W16(tp, IntrMitigate, 0x0000);
}
static void rtl_hw_start_8169(struct rtl8169_private *tp)
@@ -5096,6 +5113,9 @@ static void rtl_hw_start_8169(struct rtl8169_private *tp)
rtl8169_set_magic_reg(tp, tp->mac_version);
RTL_W32(tp, RxMissed, 0);
+
+ /* disable interrupt coalescing */
+ RTL_W16(tp, IntrMitigate, 0x0000);
}
static void rtl_hw_start(struct rtl8169_private *tp)
@@ -5114,10 +5134,8 @@ static void rtl_hw_start(struct rtl8169_private *tp)
rtl_set_rx_tx_desc_registers(tp);
rtl_lock_config_regs(tp);
- /* disable interrupt coalescing */
- RTL_W16(tp, IntrMitigate, 0x0000);
/* Initially a 10 us delay. Turned it into a PCI commit. - FR */
- RTL_R8(tp, IntrMask);
+ RTL_R8(tp, ChipCmd);
RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
rtl_init_rxcfg(tp);
rtl_set_tx_config_registers(tp);
@@ -5695,7 +5713,7 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp,
rtl8169_unmap_tx_skb(tp_to_dev(tp), tx_skb,
tp->TxDescArray + entry);
- if (status & LastFrag) {
+ if (tx_skb->skb) {
pkts_compl++;
bytes_compl += tx_skb->skb->len;
napi_consume_skb(tx_skb->skb, budget);
@@ -5854,9 +5872,10 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget
static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
{
struct rtl8169_private *tp = dev_instance;
- u16 status = RTL_R16(tp, IntrStatus);
+ u32 status = rtl_get_events(tp);
- if (!tp->irq_enabled || status == 0xffff || !(status & tp->irq_mask))
+ if (!tp->irq_enabled || (status & 0xffff) == 0xffff ||
+ !(status & tp->irq_mask))
return IRQ_NONE;
if (unlikely(status & SYSErr)) {
@@ -6623,7 +6642,7 @@ static void rtl_init_mac_address(struct rtl8169_private *tp)
{
struct net_device *dev = tp->dev;
u8 *mac_addr = dev->dev_addr;
- int rc, i;
+ int rc;
rc = eth_platform_get_mac_address(tp_to_dev(tp), mac_addr);
if (!rc)
@@ -6633,8 +6652,7 @@ static void rtl_init_mac_address(struct rtl8169_private *tp)
if (is_valid_ether_addr(mac_addr))
goto done;
- for (i = 0; i < ETH_ALEN; i++)
- mac_addr[i] = RTL_R8(tp, MAC0 + i);
+ rtl_read_mac_from_reg(tp, mac_addr, MAC0);
if (is_valid_ether_addr(mac_addr))
goto done;
--
2.23.0
^ permalink raw reply related
* [PATCH net-next 0/4] r8169: add support for RTL8125
From: Heiner Kallweit @ 2019-08-27 18:40 UTC (permalink / raw)
To: Realtek linux nic maintainers, David Miller
Cc: netdev@vger.kernel.org, Chun-Hao Lin
This series adds support for the 2.5Gbps chip RTl8125. It can be found
on PCIe network cards, and on an increasing number of consumer gaming
mainboards. Series is partially based on the r8125 vendor driver.
Tested with a Delock 89531 PCIe card against a Netgear GS110MX
Multi-Gig switch.
Firmware isn't strictly needed, but on some systems there may be
compatibility issues w/o firmware. Firmware has been submitted to
linux-firmware.
Heiner Kallweit (4):
r8169: prepare for adding RTL8125 support
r8169: add support for RTL8125
r8169: add RTL8125 PHY initialization
r8169: add support for EEE on RTL8125
drivers/net/ethernet/realtek/Kconfig | 9 +-
drivers/net/ethernet/realtek/r8169_main.c | 464 ++++++++++++++++++++--
2 files changed, 443 insertions(+), 30 deletions(-)
--
2.23.0
^ permalink raw reply
* Re: [GIT] Networking
From: pr-tracker-bot @ 2019-08-27 18:35 UTC (permalink / raw)
To: David Miller; +Cc: torvalds, akpm, netdev, linux-kernel
In-Reply-To: <20190825.232902.493461685673378789.davem@davemloft.net>
The pull request you sent on Sun, 25 Aug 2019 23:29:02 -0700 (PDT):
> git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git refs/heads/master
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/452a04441b4d0d2d567e4128af58867739002640
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox