* [PATCH v2 4/6] crypto: virtio: convert to new crypto engine API
From: Corentin Labbe @ 2018-01-26 19:15 UTC (permalink / raw)
To: alexandre.torgue, arei.gonglei, corbet, davem, herbert, jasowang,
mcoquelin.stm32, mst, fabien.dessenne
Cc: linux-doc, linux-kernel, virtualization, linux-sunxi,
Corentin Labbe, linux-crypto, linux-arm-kernel
In-Reply-To: <20180126191534.17569-1-clabbe.montjoie@gmail.com>
This patch convert the driver to the new crypto engine API.
Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
---
drivers/crypto/virtio/virtio_crypto_algs.c | 16 ++++++++++------
drivers/crypto/virtio/virtio_crypto_common.h | 3 +--
drivers/crypto/virtio/virtio_crypto_core.c | 3 ---
3 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/drivers/crypto/virtio/virtio_crypto_algs.c b/drivers/crypto/virtio/virtio_crypto_algs.c
index abe8c15450df..ba190cfa7aa1 100644
--- a/drivers/crypto/virtio/virtio_crypto_algs.c
+++ b/drivers/crypto/virtio/virtio_crypto_algs.c
@@ -29,6 +29,7 @@
struct virtio_crypto_ablkcipher_ctx {
+ struct crypto_engine_ctx enginectx;
struct virtio_crypto *vcrypto;
struct crypto_tfm *tfm;
@@ -491,7 +492,7 @@ static int virtio_crypto_ablkcipher_encrypt(struct ablkcipher_request *req)
vc_sym_req->ablkcipher_req = req;
vc_sym_req->encrypt = true;
- return crypto_transfer_cipher_request_to_engine(data_vq->engine, req);
+ return crypto_transfer_ablkcipher_request_to_engine(data_vq->engine, req);
}
static int virtio_crypto_ablkcipher_decrypt(struct ablkcipher_request *req)
@@ -511,7 +512,7 @@ static int virtio_crypto_ablkcipher_decrypt(struct ablkcipher_request *req)
vc_sym_req->ablkcipher_req = req;
vc_sym_req->encrypt = false;
- return crypto_transfer_cipher_request_to_engine(data_vq->engine, req);
+ return crypto_transfer_ablkcipher_request_to_engine(data_vq->engine, req);
}
static int virtio_crypto_ablkcipher_init(struct crypto_tfm *tfm)
@@ -521,6 +522,9 @@ static int virtio_crypto_ablkcipher_init(struct crypto_tfm *tfm)
tfm->crt_ablkcipher.reqsize = sizeof(struct virtio_crypto_sym_request);
ctx->tfm = tfm;
+ ctx->enginectx.op.do_one_request = virtio_crypto_ablkcipher_crypt_req;
+ ctx->enginectx.op.prepare_request = NULL;
+ ctx->enginectx.op.unprepare_request = NULL;
return 0;
}
@@ -538,9 +542,9 @@ static void virtio_crypto_ablkcipher_exit(struct crypto_tfm *tfm)
}
int virtio_crypto_ablkcipher_crypt_req(
- struct crypto_engine *engine,
- struct ablkcipher_request *req)
+ struct crypto_engine *engine, void *vreq)
{
+ struct ablkcipher_request *req = container_of(vreq, struct ablkcipher_request, base);
struct virtio_crypto_sym_request *vc_sym_req =
ablkcipher_request_ctx(req);
struct virtio_crypto_request *vc_req = &vc_sym_req->base;
@@ -561,8 +565,8 @@ static void virtio_crypto_ablkcipher_finalize_req(
struct ablkcipher_request *req,
int err)
{
- crypto_finalize_cipher_request(vc_sym_req->base.dataq->engine,
- req, err);
+ crypto_finalize_ablkcipher_request(vc_sym_req->base.dataq->engine,
+ req, err);
kzfree(vc_sym_req->iv);
virtcrypto_clear_request(&vc_sym_req->base);
}
diff --git a/drivers/crypto/virtio/virtio_crypto_common.h b/drivers/crypto/virtio/virtio_crypto_common.h
index e976539a05d9..72621bd67211 100644
--- a/drivers/crypto/virtio/virtio_crypto_common.h
+++ b/drivers/crypto/virtio/virtio_crypto_common.h
@@ -107,8 +107,7 @@ struct virtio_crypto *virtcrypto_get_dev_node(int node);
int virtcrypto_dev_start(struct virtio_crypto *vcrypto);
void virtcrypto_dev_stop(struct virtio_crypto *vcrypto);
int virtio_crypto_ablkcipher_crypt_req(
- struct crypto_engine *engine,
- struct ablkcipher_request *req);
+ struct crypto_engine *engine, void *vreq);
void
virtcrypto_clear_request(struct virtio_crypto_request *vc_req);
diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c
index ff1410a32c2b..83326986c113 100644
--- a/drivers/crypto/virtio/virtio_crypto_core.c
+++ b/drivers/crypto/virtio/virtio_crypto_core.c
@@ -111,9 +111,6 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi)
ret = -ENOMEM;
goto err_engine;
}
-
- vi->data_vq[i].engine->cipher_one_request =
- virtio_crypto_ablkcipher_crypt_req;
}
kfree(names);
--
2.13.6
^ permalink raw reply related
* [PATCH v2 3/6] crypto: omap: convert to new crypto engine API
From: Corentin Labbe @ 2018-01-26 19:15 UTC (permalink / raw)
To: alexandre.torgue, arei.gonglei, corbet, davem, herbert, jasowang,
mcoquelin.stm32, mst, fabien.dessenne
Cc: linux-doc, linux-kernel, virtualization, linux-sunxi,
Corentin Labbe, linux-crypto, linux-arm-kernel
In-Reply-To: <20180126191534.17569-1-clabbe.montjoie@gmail.com>
This patch convert the driver to the new crypto engine API.
Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
---
drivers/crypto/omap-aes.c | 21 +++++++++++++++------
drivers/crypto/omap-aes.h | 3 +++
drivers/crypto/omap-des.c | 24 ++++++++++++++++++------
3 files changed, 36 insertions(+), 12 deletions(-)
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index fbec0a2e76dd..5bd383ed3dec 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -388,7 +388,7 @@ static void omap_aes_finish_req(struct omap_aes_dev *dd, int err)
pr_debug("err: %d\n", err);
- crypto_finalize_cipher_request(dd->engine, req, err);
+ crypto_finalize_ablkcipher_request(dd->engine, req, err);
pm_runtime_mark_last_busy(dd->dev);
pm_runtime_put_autosuspend(dd->dev);
@@ -408,14 +408,15 @@ static int omap_aes_handle_queue(struct omap_aes_dev *dd,
struct ablkcipher_request *req)
{
if (req)
- return crypto_transfer_cipher_request_to_engine(dd->engine, req);
+ return crypto_transfer_ablkcipher_request_to_engine(dd->engine, req);
return 0;
}
static int omap_aes_prepare_req(struct crypto_engine *engine,
- struct ablkcipher_request *req)
+ void *areq)
{
+ struct ablkcipher_request *req = container_of(areq, struct ablkcipher_request, base);
struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(
crypto_ablkcipher_reqtfm(req));
struct omap_aes_reqctx *rctx = ablkcipher_request_ctx(req);
@@ -468,8 +469,9 @@ static int omap_aes_prepare_req(struct crypto_engine *engine,
}
static int omap_aes_crypt_req(struct crypto_engine *engine,
- struct ablkcipher_request *req)
+ void *areq)
{
+ struct ablkcipher_request *req = container_of(areq, struct ablkcipher_request, base);
struct omap_aes_reqctx *rctx = ablkcipher_request_ctx(req);
struct omap_aes_dev *dd = rctx->dd;
@@ -601,6 +603,11 @@ static int omap_aes_ctr_decrypt(struct ablkcipher_request *req)
return omap_aes_crypt(req, FLAGS_CTR);
}
+static int omap_aes_prepare_req(struct crypto_engine *engine,
+ void *req);
+static int omap_aes_crypt_req(struct crypto_engine *engine,
+ void *req);
+
static int omap_aes_cra_init(struct crypto_tfm *tfm)
{
const char *name = crypto_tfm_alg_name(tfm);
@@ -616,6 +623,10 @@ static int omap_aes_cra_init(struct crypto_tfm *tfm)
tfm->crt_ablkcipher.reqsize = sizeof(struct omap_aes_reqctx);
+ ctx->enginectx.op.prepare_request = omap_aes_prepare_req;
+ ctx->enginectx.op.unprepare_request = NULL;
+ ctx->enginectx.op.do_one_request = omap_aes_crypt_req;
+
return 0;
}
@@ -1119,8 +1130,6 @@ static int omap_aes_probe(struct platform_device *pdev)
goto err_engine;
}
- dd->engine->prepare_cipher_request = omap_aes_prepare_req;
- dd->engine->cipher_one_request = omap_aes_crypt_req;
err = crypto_engine_start(dd->engine);
if (err)
goto err_engine;
diff --git a/drivers/crypto/omap-aes.h b/drivers/crypto/omap-aes.h
index 8906342e2b9a..fc3b46a85809 100644
--- a/drivers/crypto/omap-aes.h
+++ b/drivers/crypto/omap-aes.h
@@ -13,6 +13,8 @@
#ifndef __OMAP_AES_H__
#define __OMAP_AES_H__
+#include <crypto/engine.h>
+
#define DST_MAXBURST 4
#define DMA_MIN (DST_MAXBURST * sizeof(u32))
@@ -95,6 +97,7 @@ struct omap_aes_gcm_result {
};
struct omap_aes_ctx {
+ struct crypto_engine_ctx enginectx;
int keylen;
u32 key[AES_KEYSIZE_256 / sizeof(u32)];
u8 nonce[4];
diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c
index ebc5c0f11f03..eb95b0d7f184 100644
--- a/drivers/crypto/omap-des.c
+++ b/drivers/crypto/omap-des.c
@@ -86,6 +86,7 @@
#define FLAGS_OUT_DATA_ST_SHIFT 10
struct omap_des_ctx {
+ struct crypto_engine_ctx enginectx;
struct omap_des_dev *dd;
int keylen;
@@ -498,7 +499,7 @@ static void omap_des_finish_req(struct omap_des_dev *dd, int err)
pr_debug("err: %d\n", err);
- crypto_finalize_cipher_request(dd->engine, req, err);
+ crypto_finalize_ablkcipher_request(dd->engine, req, err);
pm_runtime_mark_last_busy(dd->dev);
pm_runtime_put_autosuspend(dd->dev);
@@ -520,14 +521,15 @@ static int omap_des_handle_queue(struct omap_des_dev *dd,
struct ablkcipher_request *req)
{
if (req)
- return crypto_transfer_cipher_request_to_engine(dd->engine, req);
+ return crypto_transfer_ablkcipher_request_to_engine(dd->engine, req);
return 0;
}
static int omap_des_prepare_req(struct crypto_engine *engine,
- struct ablkcipher_request *req)
+ void *areq)
{
+ struct ablkcipher_request *req = container_of(areq, struct ablkcipher_request, base);
struct omap_des_ctx *ctx = crypto_ablkcipher_ctx(
crypto_ablkcipher_reqtfm(req));
struct omap_des_dev *dd = omap_des_find_dev(ctx);
@@ -582,8 +584,9 @@ static int omap_des_prepare_req(struct crypto_engine *engine,
}
static int omap_des_crypt_req(struct crypto_engine *engine,
- struct ablkcipher_request *req)
+ void *areq)
{
+ struct ablkcipher_request *req = container_of(areq, struct ablkcipher_request, base);
struct omap_des_ctx *ctx = crypto_ablkcipher_ctx(
crypto_ablkcipher_reqtfm(req));
struct omap_des_dev *dd = omap_des_find_dev(ctx);
@@ -695,12 +698,23 @@ static int omap_des_cbc_decrypt(struct ablkcipher_request *req)
return omap_des_crypt(req, FLAGS_CBC);
}
+static int omap_des_prepare_req(struct crypto_engine *engine,
+ void *areq);
+static int omap_des_crypt_req(struct crypto_engine *engine,
+ void *areq);
+
static int omap_des_cra_init(struct crypto_tfm *tfm)
{
+ struct omap_des_ctx *ctx = crypto_tfm_ctx(tfm);
+
pr_debug("enter\n");
tfm->crt_ablkcipher.reqsize = sizeof(struct omap_des_reqctx);
+ ctx->enginectx.op.prepare_request = omap_des_prepare_req;
+ ctx->enginectx.op.unprepare_request = NULL;
+ ctx->enginectx.op.do_one_request = omap_des_crypt_req;
+
return 0;
}
@@ -1046,8 +1060,6 @@ static int omap_des_probe(struct platform_device *pdev)
goto err_engine;
}
- dd->engine->prepare_cipher_request = omap_des_prepare_req;
- dd->engine->cipher_one_request = omap_des_crypt_req;
err = crypto_engine_start(dd->engine);
if (err)
goto err_engine;
--
2.13.6
^ permalink raw reply related
* [PATCH v2 2/6] crypto: engine - Permit to enqueue all async requests
From: Corentin Labbe @ 2018-01-26 19:15 UTC (permalink / raw)
To: alexandre.torgue, arei.gonglei, corbet, davem, herbert, jasowang,
mcoquelin.stm32, mst, fabien.dessenne
Cc: linux-doc, linux-kernel, virtualization, linux-sunxi,
Corentin Labbe, linux-crypto, linux-arm-kernel
In-Reply-To: <20180126191534.17569-1-clabbe.montjoie@gmail.com>
The crypto engine could actually only enqueue hash and ablkcipher request.
This patch permit it to enqueue any type of crypto_async_request.
Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Tested-by: Fabien Dessenne <fabien.dessenne@st.com>
---
crypto/crypto_engine.c | 301 ++++++++++++++++++++++++++----------------------
include/crypto/engine.h | 68 ++++++-----
2 files changed, 203 insertions(+), 166 deletions(-)
diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
index 61e7c4e02fd2..992e8d8dcdd9 100644
--- a/crypto/crypto_engine.c
+++ b/crypto/crypto_engine.c
@@ -15,13 +15,50 @@
#include <linux/err.h>
#include <linux/delay.h>
#include <crypto/engine.h>
-#include <crypto/internal/hash.h>
#include <uapi/linux/sched/types.h>
#include "internal.h"
#define CRYPTO_ENGINE_MAX_QLEN 10
/**
+ * crypto_finalize_request - finalize one request if the request is done
+ * @engine: the hardware engine
+ * @req: the request need to be finalized
+ * @err: error number
+ */
+static void crypto_finalize_request(struct crypto_engine *engine,
+ struct crypto_async_request *req, int err)
+{
+ unsigned long flags;
+ bool finalize_cur_req = false;
+ int ret;
+ struct crypto_engine_ctx *enginectx;
+
+ spin_lock_irqsave(&engine->queue_lock, flags);
+ if (engine->cur_req == req)
+ finalize_cur_req = true;
+ spin_unlock_irqrestore(&engine->queue_lock, flags);
+
+ if (finalize_cur_req) {
+ enginectx = crypto_tfm_ctx(req->tfm);
+ if (engine->cur_req_prepared &&
+ enginectx->op.unprepare_request) {
+ ret = enginectx->op.unprepare_request(engine, req);
+ if (ret)
+ dev_err(engine->dev, "failed to unprepare request\n");
+ }
+ spin_lock_irqsave(&engine->queue_lock, flags);
+ engine->cur_req = NULL;
+ engine->cur_req_prepared = false;
+ spin_unlock_irqrestore(&engine->queue_lock, flags);
+ }
+
+ req->complete(req, err);
+
+ kthread_queue_work(engine->kworker, &engine->pump_requests);
+}
+
+/**
* crypto_pump_requests - dequeue one request from engine queue to process
* @engine: the hardware engine
* @in_kthread: true if we are in the context of the request pump thread
@@ -34,11 +71,10 @@ static void crypto_pump_requests(struct crypto_engine *engine,
bool in_kthread)
{
struct crypto_async_request *async_req, *backlog;
- struct ahash_request *hreq;
- struct ablkcipher_request *breq;
unsigned long flags;
bool was_busy = false;
- int ret, rtype;
+ int ret;
+ struct crypto_engine_ctx *enginectx;
spin_lock_irqsave(&engine->queue_lock, flags);
@@ -94,7 +130,6 @@ static void crypto_pump_requests(struct crypto_engine *engine,
spin_unlock_irqrestore(&engine->queue_lock, flags);
- rtype = crypto_tfm_alg_type(engine->cur_req->tfm);
/* Until here we get the request need to be encrypted successfully */
if (!was_busy && engine->prepare_crypt_hardware) {
ret = engine->prepare_crypt_hardware(engine);
@@ -104,57 +139,31 @@ static void crypto_pump_requests(struct crypto_engine *engine,
}
}
- switch (rtype) {
- case CRYPTO_ALG_TYPE_AHASH:
- hreq = ahash_request_cast(engine->cur_req);
- if (engine->prepare_hash_request) {
- ret = engine->prepare_hash_request(engine, hreq);
- if (ret) {
- dev_err(engine->dev, "failed to prepare request: %d\n",
- ret);
- goto req_err;
- }
- engine->cur_req_prepared = true;
- }
- ret = engine->hash_one_request(engine, hreq);
- if (ret) {
- dev_err(engine->dev, "failed to hash one request from queue\n");
- goto req_err;
- }
- return;
- case CRYPTO_ALG_TYPE_ABLKCIPHER:
- breq = ablkcipher_request_cast(engine->cur_req);
- if (engine->prepare_cipher_request) {
- ret = engine->prepare_cipher_request(engine, breq);
- if (ret) {
- dev_err(engine->dev, "failed to prepare request: %d\n",
- ret);
- goto req_err;
- }
- engine->cur_req_prepared = true;
- }
- ret = engine->cipher_one_request(engine, breq);
+ enginectx = crypto_tfm_ctx(async_req->tfm);
+
+ if (enginectx->op.prepare_request) {
+ ret = enginectx->op.prepare_request(engine, async_req);
if (ret) {
- dev_err(engine->dev, "failed to cipher one request from queue\n");
+ dev_err(engine->dev, "failed to prepare request: %d\n",
+ ret);
goto req_err;
}
- return;
- default:
- dev_err(engine->dev, "failed to prepare request of unknown type\n");
- return;
+ engine->cur_req_prepared = true;
+ }
+ if (!enginectx->op.do_one_request) {
+ dev_err(engine->dev, "failed to do request\n");
+ ret = -EINVAL;
+ goto req_err;
}
+ ret = enginectx->op.do_one_request(engine, async_req);
+ if (ret) {
+ dev_err(engine->dev, "Failed to do one request from queue: %d\n", ret);
+ goto req_err;
+ }
+ return;
req_err:
- switch (rtype) {
- case CRYPTO_ALG_TYPE_AHASH:
- hreq = ahash_request_cast(engine->cur_req);
- crypto_finalize_hash_request(engine, hreq, ret);
- break;
- case CRYPTO_ALG_TYPE_ABLKCIPHER:
- breq = ablkcipher_request_cast(engine->cur_req);
- crypto_finalize_cipher_request(engine, breq, ret);
- break;
- }
+ crypto_finalize_request(engine, async_req, ret);
return;
out:
@@ -170,13 +179,12 @@ static void crypto_pump_work(struct kthread_work *work)
}
/**
- * crypto_transfer_cipher_request - transfer the new request into the
- * enginequeue
+ * crypto_transfer_request - transfer the new request into the engine queue
* @engine: the hardware engine
* @req: the request need to be listed into the engine queue
*/
-int crypto_transfer_cipher_request(struct crypto_engine *engine,
- struct ablkcipher_request *req,
+static int crypto_transfer_request(struct crypto_engine *engine,
+ struct crypto_async_request *req,
bool need_pump)
{
unsigned long flags;
@@ -189,7 +197,7 @@ int crypto_transfer_cipher_request(struct crypto_engine *engine,
return -ESHUTDOWN;
}
- ret = ablkcipher_enqueue_request(&engine->queue, req);
+ ret = crypto_enqueue_request(&engine->queue, req);
if (!engine->busy && need_pump)
kthread_queue_work(engine->kworker, &engine->pump_requests);
@@ -197,102 +205,131 @@ int crypto_transfer_cipher_request(struct crypto_engine *engine,
spin_unlock_irqrestore(&engine->queue_lock, flags);
return ret;
}
-EXPORT_SYMBOL_GPL(crypto_transfer_cipher_request);
/**
- * crypto_transfer_cipher_request_to_engine - transfer one request to list
+ * crypto_transfer_request_to_engine - transfer one request to list
* into the engine queue
* @engine: the hardware engine
* @req: the request need to be listed into the engine queue
*/
-int crypto_transfer_cipher_request_to_engine(struct crypto_engine *engine,
- struct ablkcipher_request *req)
+static int crypto_transfer_request_to_engine(struct crypto_engine *engine,
+ struct crypto_async_request *req)
{
- return crypto_transfer_cipher_request(engine, req, true);
+ return crypto_transfer_request(engine, req, true);
}
-EXPORT_SYMBOL_GPL(crypto_transfer_cipher_request_to_engine);
/**
- * crypto_transfer_hash_request - transfer the new request into the
- * enginequeue
+ * crypto_transfer_ablkcipher_request_to_engine - transfer one ablkcipher_request
+ * to list into the engine queue
* @engine: the hardware engine
* @req: the request need to be listed into the engine queue
+ * TODO: Remove this function when skcipher conversion is finished
*/
-int crypto_transfer_hash_request(struct crypto_engine *engine,
- struct ahash_request *req, bool need_pump)
+int crypto_transfer_ablkcipher_request_to_engine(struct crypto_engine *engine,
+ struct ablkcipher_request *req)
{
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&engine->queue_lock, flags);
-
- if (!engine->running) {
- spin_unlock_irqrestore(&engine->queue_lock, flags);
- return -ESHUTDOWN;
- }
-
- ret = ahash_enqueue_request(&engine->queue, req);
+ return crypto_transfer_request_to_engine(engine, &req->base);
+}
+EXPORT_SYMBOL_GPL(crypto_transfer_ablkcipher_request_to_engine);
- if (!engine->busy && need_pump)
- kthread_queue_work(engine->kworker, &engine->pump_requests);
+/**
+ * crypto_transfer_aead_request_to_engine - transfer one aead_request
+ * to list into the engine queue
+ * @engine: the hardware engine
+ * @req: the request need to be listed into the engine queue
+ */
+int crypto_transfer_aead_request_to_engine(struct crypto_engine *engine,
+ struct aead_request *req)
+{
+ return crypto_transfer_request_to_engine(engine, &req->base);
+}
+EXPORT_SYMBOL_GPL(crypto_transfer_aead_request_to_engine);
- spin_unlock_irqrestore(&engine->queue_lock, flags);
- return ret;
+/**
+ * crypto_transfer_akcipher_request_to_engine - transfer one akcipher_request
+ * to list into the engine queue
+ * @engine: the hardware engine
+ * @req: the request need to be listed into the engine queue
+ */
+int crypto_transfer_akcipher_request_to_engine(struct crypto_engine *engine,
+ struct akcipher_request *req)
+{
+ return crypto_transfer_request_to_engine(engine, &req->base);
}
-EXPORT_SYMBOL_GPL(crypto_transfer_hash_request);
+EXPORT_SYMBOL_GPL(crypto_transfer_akcipher_request_to_engine);
/**
- * crypto_transfer_hash_request_to_engine - transfer one request to list
- * into the engine queue
+ * crypto_transfer_hash_request_to_engine - transfer one ahash_request
+ * to list into the engine queue
* @engine: the hardware engine
* @req: the request need to be listed into the engine queue
*/
int crypto_transfer_hash_request_to_engine(struct crypto_engine *engine,
struct ahash_request *req)
{
- return crypto_transfer_hash_request(engine, req, true);
+ return crypto_transfer_request_to_engine(engine, &req->base);
}
EXPORT_SYMBOL_GPL(crypto_transfer_hash_request_to_engine);
/**
- * crypto_finalize_cipher_request - finalize one request if the request is done
+ * crypto_transfer_skcipher_request_to_engine - transfer one skcipher_request
+ * to list into the engine queue
+ * @engine: the hardware engine
+ * @req: the request need to be listed into the engine queue
+ */
+int crypto_transfer_skcipher_request_to_engine(struct crypto_engine *engine,
+ struct skcipher_request *req)
+{
+ return crypto_transfer_request_to_engine(engine, &req->base);
+}
+EXPORT_SYMBOL_GPL(crypto_transfer_skcipher_request_to_engine);
+
+/**
+ * crypto_finalize_ablkcipher_request - finalize one ablkcipher_request if
+ * the request is done
* @engine: the hardware engine
* @req: the request need to be finalized
* @err: error number
+ * TODO: Remove this function when skcipher conversion is finished
*/
-void crypto_finalize_cipher_request(struct crypto_engine *engine,
- struct ablkcipher_request *req, int err)
+void crypto_finalize_ablkcipher_request(struct crypto_engine *engine,
+ struct ablkcipher_request *req, int err)
{
- unsigned long flags;
- bool finalize_cur_req = false;
- int ret;
-
- spin_lock_irqsave(&engine->queue_lock, flags);
- if (engine->cur_req == &req->base)
- finalize_cur_req = true;
- spin_unlock_irqrestore(&engine->queue_lock, flags);
-
- if (finalize_cur_req) {
- if (engine->cur_req_prepared &&
- engine->unprepare_cipher_request) {
- ret = engine->unprepare_cipher_request(engine, req);
- if (ret)
- dev_err(engine->dev, "failed to unprepare request\n");
- }
- spin_lock_irqsave(&engine->queue_lock, flags);
- engine->cur_req = NULL;
- engine->cur_req_prepared = false;
- spin_unlock_irqrestore(&engine->queue_lock, flags);
- }
+ return crypto_finalize_request(engine, &req->base, err);
+}
+EXPORT_SYMBOL_GPL(crypto_finalize_ablkcipher_request);
- req->base.complete(&req->base, err);
+/**
+ * crypto_finalize_aead_request - finalize one aead_request if
+ * the request is done
+ * @engine: the hardware engine
+ * @req: the request need to be finalized
+ * @err: error number
+ */
+void crypto_finalize_aead_request(struct crypto_engine *engine,
+ struct aead_request *req, int err)
+{
+ return crypto_finalize_request(engine, &req->base, err);
+}
+EXPORT_SYMBOL_GPL(crypto_finalize_aead_request);
- kthread_queue_work(engine->kworker, &engine->pump_requests);
+/**
+ * crypto_finalize_akcipher_request - finalize one akcipher_request if
+ * the request is done
+ * @engine: the hardware engine
+ * @req: the request need to be finalized
+ * @err: error number
+ */
+void crypto_finalize_akcipher_request(struct crypto_engine *engine,
+ struct akcipher_request *req, int err)
+{
+ return crypto_finalize_request(engine, &req->base, err);
}
-EXPORT_SYMBOL_GPL(crypto_finalize_cipher_request);
+EXPORT_SYMBOL_GPL(crypto_finalize_akcipher_request);
/**
- * crypto_finalize_hash_request - finalize one request if the request is done
+ * crypto_finalize_hash_request - finalize one ahash_request if
+ * the request is done
* @engine: the hardware engine
* @req: the request need to be finalized
* @err: error number
@@ -300,35 +337,25 @@ EXPORT_SYMBOL_GPL(crypto_finalize_cipher_request);
void crypto_finalize_hash_request(struct crypto_engine *engine,
struct ahash_request *req, int err)
{
- unsigned long flags;
- bool finalize_cur_req = false;
- int ret;
-
- spin_lock_irqsave(&engine->queue_lock, flags);
- if (engine->cur_req == &req->base)
- finalize_cur_req = true;
- spin_unlock_irqrestore(&engine->queue_lock, flags);
-
- if (finalize_cur_req) {
- if (engine->cur_req_prepared &&
- engine->unprepare_hash_request) {
- ret = engine->unprepare_hash_request(engine, req);
- if (ret)
- dev_err(engine->dev, "failed to unprepare request\n");
- }
- spin_lock_irqsave(&engine->queue_lock, flags);
- engine->cur_req = NULL;
- engine->cur_req_prepared = false;
- spin_unlock_irqrestore(&engine->queue_lock, flags);
- }
-
- req->base.complete(&req->base, err);
-
- kthread_queue_work(engine->kworker, &engine->pump_requests);
+ return crypto_finalize_request(engine, &req->base, err);
}
EXPORT_SYMBOL_GPL(crypto_finalize_hash_request);
/**
+ * crypto_finalize_skcipher_request - finalize one skcipher_request if
+ * the request is done
+ * @engine: the hardware engine
+ * @req: the request need to be finalized
+ * @err: error number
+ */
+void crypto_finalize_skcipher_request(struct crypto_engine *engine,
+ struct skcipher_request *req, int err)
+{
+ return crypto_finalize_request(engine, &req->base, err);
+}
+EXPORT_SYMBOL_GPL(crypto_finalize_skcipher_request);
+
+/**
* crypto_engine_start - start the hardware engine
* @engine: the hardware engine need to be started
*
diff --git a/include/crypto/engine.h b/include/crypto/engine.h
index dd04c1699b51..1cbec29af3d6 100644
--- a/include/crypto/engine.h
+++ b/include/crypto/engine.h
@@ -17,7 +17,10 @@
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <crypto/algapi.h>
+#include <crypto/aead.h>
+#include <crypto/akcipher.h>
#include <crypto/hash.h>
+#include <crypto/skcipher.h>
#define ENGINE_NAME_LEN 30
/*
@@ -37,12 +40,6 @@
* @unprepare_crypt_hardware: there are currently no more requests on the
* queue so the subsystem notifies the driver that it may relax the
* hardware by issuing this call
- * @prepare_cipher_request: do some prepare if need before handle the current request
- * @unprepare_cipher_request: undo any work done by prepare_cipher_request()
- * @cipher_one_request: do encryption for current request
- * @prepare_hash_request: do some prepare if need before handle the current request
- * @unprepare_hash_request: undo any work done by prepare_hash_request()
- * @hash_one_request: do hash for current request
* @kworker: kthread worker struct for request pump
* @pump_requests: work struct for scheduling work to the request pump
* @priv_data: the engine private data
@@ -65,19 +62,6 @@ struct crypto_engine {
int (*prepare_crypt_hardware)(struct crypto_engine *engine);
int (*unprepare_crypt_hardware)(struct crypto_engine *engine);
- int (*prepare_cipher_request)(struct crypto_engine *engine,
- struct ablkcipher_request *req);
- int (*unprepare_cipher_request)(struct crypto_engine *engine,
- struct ablkcipher_request *req);
- int (*prepare_hash_request)(struct crypto_engine *engine,
- struct ahash_request *req);
- int (*unprepare_hash_request)(struct crypto_engine *engine,
- struct ahash_request *req);
- int (*cipher_one_request)(struct crypto_engine *engine,
- struct ablkcipher_request *req);
- int (*hash_one_request)(struct crypto_engine *engine,
- struct ahash_request *req);
-
struct kthread_worker *kworker;
struct kthread_work pump_requests;
@@ -85,19 +69,45 @@ struct crypto_engine {
struct crypto_async_request *cur_req;
};
-int crypto_transfer_cipher_request(struct crypto_engine *engine,
- struct ablkcipher_request *req,
- bool need_pump);
-int crypto_transfer_cipher_request_to_engine(struct crypto_engine *engine,
- struct ablkcipher_request *req);
-int crypto_transfer_hash_request(struct crypto_engine *engine,
- struct ahash_request *req, bool need_pump);
+/*
+ * struct crypto_engine_op - crypto hardware engine operations
+ * @prepare__request: do some prepare if need before handle the current request
+ * @unprepare_request: undo any work done by prepare_request()
+ * @do_one_request: do encryption for current request
+ */
+struct crypto_engine_op {
+ int (*prepare_request)(struct crypto_engine *engine,
+ void *areq);
+ int (*unprepare_request)(struct crypto_engine *engine,
+ void *areq);
+ int (*do_one_request)(struct crypto_engine *engine,
+ void *areq);
+};
+
+struct crypto_engine_ctx {
+ struct crypto_engine_op op;
+};
+
+int crypto_transfer_ablkcipher_request_to_engine(struct crypto_engine *engine,
+ struct ablkcipher_request *req);
+int crypto_transfer_aead_request_to_engine(struct crypto_engine *engine,
+ struct aead_request *req);
+int crypto_transfer_akcipher_request_to_engine(struct crypto_engine *engine,
+ struct akcipher_request *req);
int crypto_transfer_hash_request_to_engine(struct crypto_engine *engine,
- struct ahash_request *req);
-void crypto_finalize_cipher_request(struct crypto_engine *engine,
- struct ablkcipher_request *req, int err);
+ struct ahash_request *req);
+int crypto_transfer_skcipher_request_to_engine(struct crypto_engine *engine,
+ struct skcipher_request *req);
+void crypto_finalize_ablkcipher_request(struct crypto_engine *engine,
+ struct ablkcipher_request *req, int err);
+void crypto_finalize_aead_request(struct crypto_engine *engine,
+ struct aead_request *req, int err);
+void crypto_finalize_akcipher_request(struct crypto_engine *engine,
+ struct akcipher_request *req, int err);
void crypto_finalize_hash_request(struct crypto_engine *engine,
struct ahash_request *req, int err);
+void crypto_finalize_skcipher_request(struct crypto_engine *engine,
+ struct skcipher_request *req, int err);
int crypto_engine_start(struct crypto_engine *engine);
int crypto_engine_stop(struct crypto_engine *engine);
struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt);
--
2.13.6
^ permalink raw reply related
* [PATCH v2 1/6] Documentation: crypto: document crypto engine API
From: Corentin Labbe @ 2018-01-26 19:15 UTC (permalink / raw)
To: alexandre.torgue, arei.gonglei, corbet, davem, herbert, jasowang,
mcoquelin.stm32, mst, fabien.dessenne
Cc: linux-doc, linux-kernel, virtualization, linux-sunxi,
Corentin Labbe, linux-crypto, linux-arm-kernel
In-Reply-To: <20180126191534.17569-1-clabbe.montjoie@gmail.com>
Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
---
Documentation/crypto/crypto_engine.rst | 48 ++++++++++++++++++++++++++++++++++
1 file changed, 48 insertions(+)
create mode 100644 Documentation/crypto/crypto_engine.rst
diff --git a/Documentation/crypto/crypto_engine.rst b/Documentation/crypto/crypto_engine.rst
new file mode 100644
index 000000000000..8272ac92a14f
--- /dev/null
+++ b/Documentation/crypto/crypto_engine.rst
@@ -0,0 +1,48 @@
+=============
+CRYPTO ENGINE
+=============
+
+Overview
+--------
+The crypto engine API (CE), is a crypto queue manager.
+
+Requirement
+-----------
+You have to put at start of your tfm_ctx the struct crypto_engine_ctx
+struct your_tfm_ctx {
+ struct crypto_engine_ctx enginectx;
+ ...
+};
+Why: Since CE manage only crypto_async_request, it cannot know the underlying
+request_type and so have access only on the TFM.
+So using container_of for accessing __ctx is impossible.
+Furthermore, the crypto engine cannot know the "struct your_tfm_ctx",
+so it must assume that crypto_engine_ctx is at start of it.
+
+Order of operations
+-------------------
+You have to obtain a struct crypto_engine via crypto_engine_alloc_init().
+And start it via crypto_engine_start().
+
+Before transferring any request, you have to fill the enginectx.
+- prepare_request: (taking a function pointer) If you need to do some processing before doing the request
+- unprepare_request: (taking a function pointer) Undoing what's done in prepare_request
+- do_one_request: (taking a function pointer) Do encryption for current request
+
+Note: that those three functions get the crypto_async_request associated with the received request.
+So your need to get the original request via container_of(areq, struct yourrequesttype_request, base);
+
+When your driver receive a crypto_request, you have to transfer it to
+the cryptoengine via one of:
+- crypto_transfer_ablkcipher_request_to_engine()
+- crypto_transfer_aead_request_to_engine()
+- crypto_transfer_akcipher_request_to_engine()
+- crypto_transfer_hash_request_to_engine()
+- crypto_transfer_skcipher_request_to_engine()
+
+At the end of the request process, a call to one of the following function is needed:
+- crypto_finalize_ablkcipher_request
+- crypto_finalize_aead_request
+- crypto_finalize_akcipher_request
+- crypto_finalize_hash_request
+- crypto_finalize_skcipher_request
--
2.13.6
^ permalink raw reply related
* [PATCH v2 0/6] crypto: engine - Permit to enqueue all async requests
From: Corentin Labbe @ 2018-01-26 19:15 UTC (permalink / raw)
To: alexandre.torgue, arei.gonglei, corbet, davem, herbert, jasowang,
mcoquelin.stm32, mst, fabien.dessenne
Cc: linux-doc, linux-kernel, virtualization, linux-sunxi,
Corentin Labbe, linux-crypto, linux-arm-kernel
Hello
The current crypto_engine support only ahash and ablkcipher request.
My first patch which try to add skcipher was Nacked, it will add too many functions
and adding other algs(aead, asymetric_key) will make the situation worst.
This patchset remove all algs specific stuff and now only process generic crypto_async_request.
The requests handler function pointer are now moved out of struct engine and
are now stored directly in a crypto_engine_reqctx.
The original proposal of Herbert [1] cannot be done completly since the crypto_engine
could only dequeue crypto_async_request and it is impossible to access any request_ctx
without knowing the underlying request type.
So I do something near that was requested: adding crypto_engine_reqctx in TFM context.
Note that the current implementation expect that crypto_engine_reqctx
is the first member of the context.
The first patch is a try to document the crypto engine API.
The second patch convert the crypto engine with the new way,
while the following patchs convert the 4 existing users of crypto_engine.
Note that this split break bisection, so probably the final commit will be all merged.
Appart from virtio, all 4 latest patch were compile tested only.
But the crypto engine is tested with my new sun8i-ce driver.
Regards
[1] https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1474434.html
Changes since V1:
- renamed crypto_engine_reqctx to crypto_engine_ctx
- indentation fix in function parameter
- do not export crypto_transfer_request
- Add aead support
- crypto_finalize_request is now static
Changes since RFC:
- Added a documentation patch
- Added patch for stm32-cryp
- Changed parameter of all crypto_engine_op functions from
crypto_async_request to void*
- Reintroduced crypto_transfer_xxx_request_to_engine functions
Corentin Labbe (6):
Documentation: crypto: document crypto engine API
crypto: engine - Permit to enqueue all async requests
crypto: omap: convert to new crypto engine API
crypto: virtio: convert to new crypto engine API
crypto: stm32-hash: convert to the new crypto engine API
crypto: stm32-cryp: convert to the new crypto engine API
Documentation/crypto/crypto_engine.rst | 48 +++++
crypto/crypto_engine.c | 301 +++++++++++++++------------
drivers/crypto/omap-aes.c | 21 +-
drivers/crypto/omap-aes.h | 3 +
drivers/crypto/omap-des.c | 24 ++-
drivers/crypto/stm32/stm32-cryp.c | 29 ++-
drivers/crypto/stm32/stm32-hash.c | 20 +-
drivers/crypto/virtio/virtio_crypto_algs.c | 16 +-
drivers/crypto/virtio/virtio_crypto_common.h | 3 +-
drivers/crypto/virtio/virtio_crypto_core.c | 3 -
include/crypto/engine.h | 68 +++---
11 files changed, 332 insertions(+), 204 deletions(-)
create mode 100644 Documentation/crypto/crypto_engine.rst
--
2.13.6
^ permalink raw reply
* Re: [RFC PATCH net-next v2 2/2] virtio_net: Extend virtio to use VF datapath when available
From: Samudrala, Sridhar @ 2018-01-26 18:15 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: alexander.h.duyck, virtio-dev, kubakici, netdev, virtualization,
davem
In-Reply-To: <20180126185617-mutt-send-email-mst@kernel.org>
On 1/26/2018 8:58 AM, Michael S. Tsirkin wrote:
> On Thu, Jan 11, 2018 at 09:58:39PM -0800, Sridhar Samudrala wrote:
>> @@ -2859,6 +3123,42 @@ static struct virtio_driver virtio_net_driver = {
>> #endif
>> };
>>
>> +static int virtio_netdev_event(struct notifier_block *this,
>> + unsigned long event, void *ptr)
>> +{
>> + struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
>> +
>> + /* Skip our own events */
>> + if (event_dev->netdev_ops == &virtnet_netdev)
>> + return NOTIFY_DONE;
>> +
>> + /* Avoid non-Ethernet type devices */
>> + if (event_dev->type != ARPHRD_ETHER)
>> + return NOTIFY_DONE;
>> +
>> + /* Avoid Vlan dev with same MAC registering as VF */
>> + if (is_vlan_dev(event_dev))
>> + return NOTIFY_DONE;
>> +
>> + /* Avoid Bonding master dev with same MAC registering as VF */
>> + if ((event_dev->priv_flags & IFF_BONDING) &&
>> + (event_dev->flags & IFF_MASTER))
>> + return NOTIFY_DONE;
>> +
>> + switch (event) {
>> + case NETDEV_REGISTER:
>> + return virtnet_register_vf(event_dev);
>> + case NETDEV_UNREGISTER:
>> + return virtnet_unregister_vf(event_dev);
>> + default:
>> + return NOTIFY_DONE;
>> + }
>> +}
>> +
>> +static struct notifier_block virtio_netdev_notifier = {
>> + .notifier_call = virtio_netdev_event,
>> +};
>> +
>> static __init int virtio_net_driver_init(void)
>> {
>> int ret;
>> @@ -2877,6 +3177,8 @@ static __init int virtio_net_driver_init(void)
>> ret = register_virtio_driver(&virtio_net_driver);
>> if (ret)
>> goto err_virtio;
>> +
>> + register_netdevice_notifier(&virtio_netdev_notifier);
>> return 0;
>> err_virtio:
>> cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
>> @@ -2889,6 +3191,7 @@ module_init(virtio_net_driver_init);
>>
>> static __exit void virtio_net_driver_exit(void)
>> {
>> + unregister_netdevice_notifier(&virtio_netdev_notifier);
>> unregister_virtio_driver(&virtio_net_driver);
>> cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
>> cpuhp_remove_multi_state(virtionet_online);
> I have a question here: what if PT device driver module loads
> and creates a device before virtio?
>
>
Initially i also had this question if we get NETDEV_REGISTER events for
netdevs
that are already present. But it looks like
register_netdevice_notifier() will cause
replay of all the registration and up events of existing devices.
/**
* register_netdevice_notifier - register a network notifier block
* @nb: notifier
*
* Register a notifier to be called when network device events occur.
* The notifier passed is linked into the kernel structures and must
* not be reused until it has been unregistered. A negative errno code
* is returned on a failure.
*
* When registered all registration and up events are replayed
* to the new notifier to allow device to have a race free
* view of the network device list.
*/
Thanks
Sridhar
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [RFC PATCH net-next v2 2/2] virtio_net: Extend virtio to use VF datapath when available
From: Michael S. Tsirkin @ 2018-01-26 16:58 UTC (permalink / raw)
To: Sridhar Samudrala
Cc: alexander.h.duyck, virtio-dev, kubakici, netdev, virtualization,
davem
In-Reply-To: <1515736720-39368-3-git-send-email-sridhar.samudrala@intel.com>
On Thu, Jan 11, 2018 at 09:58:39PM -0800, Sridhar Samudrala wrote:
> @@ -2859,6 +3123,42 @@ static struct virtio_driver virtio_net_driver = {
> #endif
> };
>
> +static int virtio_netdev_event(struct notifier_block *this,
> + unsigned long event, void *ptr)
> +{
> + struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
> +
> + /* Skip our own events */
> + if (event_dev->netdev_ops == &virtnet_netdev)
> + return NOTIFY_DONE;
> +
> + /* Avoid non-Ethernet type devices */
> + if (event_dev->type != ARPHRD_ETHER)
> + return NOTIFY_DONE;
> +
> + /* Avoid Vlan dev with same MAC registering as VF */
> + if (is_vlan_dev(event_dev))
> + return NOTIFY_DONE;
> +
> + /* Avoid Bonding master dev with same MAC registering as VF */
> + if ((event_dev->priv_flags & IFF_BONDING) &&
> + (event_dev->flags & IFF_MASTER))
> + return NOTIFY_DONE;
> +
> + switch (event) {
> + case NETDEV_REGISTER:
> + return virtnet_register_vf(event_dev);
> + case NETDEV_UNREGISTER:
> + return virtnet_unregister_vf(event_dev);
> + default:
> + return NOTIFY_DONE;
> + }
> +}
> +
> +static struct notifier_block virtio_netdev_notifier = {
> + .notifier_call = virtio_netdev_event,
> +};
> +
> static __init int virtio_net_driver_init(void)
> {
> int ret;
> @@ -2877,6 +3177,8 @@ static __init int virtio_net_driver_init(void)
> ret = register_virtio_driver(&virtio_net_driver);
> if (ret)
> goto err_virtio;
> +
> + register_netdevice_notifier(&virtio_netdev_notifier);
> return 0;
> err_virtio:
> cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
> @@ -2889,6 +3191,7 @@ module_init(virtio_net_driver_init);
>
> static __exit void virtio_net_driver_exit(void)
> {
> + unregister_netdevice_notifier(&virtio_netdev_notifier);
> unregister_virtio_driver(&virtio_net_driver);
> cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
> cpuhp_remove_multi_state(virtionet_online);
I have a question here: what if PT device driver module loads
and creates a device before virtio?
> --
> 2.14.3
^ permalink raw reply
* Re: [virtio-dev] Re: [RFC PATCH net-next v2 2/2] virtio_net: Extend virtio to use VF datapath when available
From: Samudrala, Sridhar @ 2018-01-26 16:51 UTC (permalink / raw)
To: Siwei Liu, Michael S. Tsirkin
Cc: Alexander Duyck, virtio-dev, Jakub Kicinski, Netdev,
virtualization, David Miller
In-Reply-To: <CADGSJ2308J=tb1B1zrPi0bFzqYSoLfJsRZxBg6wT+wuCfRO6vw@mail.gmail.com>
On 1/26/2018 12:14 AM, Siwei Liu wrote:
> On Tue, Jan 23, 2018 at 2:58 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
>> On Tue, Jan 23, 2018 at 12:24:47PM -0800, Siwei Liu wrote:
>>> On Mon, Jan 22, 2018 at 1:41 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
>>>> On Mon, Jan 22, 2018 at 12:27:14PM -0800, Siwei Liu wrote:
>>>>> First off, as mentioned in another thread, the model of stacking up
>>>>> virt-bond functionality over virtio seems a wrong direction to me.
>>>>> Essentially the migration process would need to carry over all guest
>>>>> side configurations previously done on the VF/PT and get them moved to
>>>>> the new device being it virtio or VF/PT.
>>>> I might be wrong but I don't see why we should worry about this usecase.
>>>> Whoever has a bond configured already has working config for migration.
>>>> We are trying to help people who don't, not convert existig users.
>>> That has been placed in the view of cloud providers that the imported
>>> images from the store must be able to run unmodified thus no
>>> additional setup script is allowed (just as Stephen mentioned in
>>> another mail). Cloud users don't care about live migration themselves
>>> but the providers are required to implement such automation mechanism
>>> to make this process transparent if at all possible. The user does not
>>> care about the device underneath being VF or not, but they do care
>>> about consistency all across and the resulting performance
>>> acceleration in making VF the prefered datapath. It is not quite
>>> peculiar user cases but IMHO *any* approach proposed for live
>>> migration should be able to persist the state including network config
>>> e.g. as simple as MTU. Actually this requirement has nothing to do
>>> with virtio but our target users are live migration agnostic, being it
>>> tracking DMA through dirty pages, using virtio as the helper, or
>>> whatsoever, the goal of persisting configs across remains same.
>> So the patching being discussed here will mostly do exactly that if your
>> original config was simply a single virtio net device.
>>
> True, but I don't see the patch being discussed starts with good
> foundation of supporting the same for VF/PT device. That is the core
> of the issue.
>> What kind of configs do your users have right now?
> Any configs be it generic or driver specific that the VF/PT device
> supports and have been enabled/configured. General network configs
> (MAC, IP address, VLAN, MTU, iptables rules), ethtool settings
> (hardware offload, # of queues and ring entris, RSC options, rss
> rxfh-indir table, rx-flow-hash, et al) , bpf/XDP program being run, tc
> flower offload, just to name a few. As cloud providers we don't limit
> users from applying driver specific tuning to the NIC/VF, and
> sometimes this is essential to achieving best performance for their
> workload. We've seen cases like tuning coalescing parameters for
> getting low latency, changing rx-flow-hash function for better VXLAN
> throughput, or even adopting quite advanced NIC features such as flow
> director or cloud filter. We don't expect users to compromise even a
> little bit on these. That is once we turn on live migration for the VF
> or pass through devices in the VM, it all takes place under the hood,
> users (guest admins, applications) don't have to react upon it or even
> notice the change. I should note that the majority of live migrations
> take place between machines with completely identical hardware, it's
> more critical than necessary to keep the config as-is across the move,
> stealth while quiet.
This usecase is much more complicated and different than what this patch
is trying
to address. Also your usecase seems to be assuming that source and
destination
hosts are identical and have the same HW.
It makes it pretty hard to transparently migrate all these settings with
live
migration when we are looking at a solution that unplugs the VF
interface from
the host and the VF driver is unloaded.
> As you see generic bond or bridge cannot suffice the need. That's why
> we need a new customized virt bond driver, and tailor it for VM live
> migration specifically. Leveraging para-virtual e.g. virtio net device
> as the backup path is one thing, tracking through driver config
> changes in order to re-config as necessary is another. I would think
> without considering the latter, the proposal being discussed is rather
> incomplete, and remote to be useful in production.
>
>>
>>>>> Without the help of a new
>>>>> upper layer bond driver that enslaves virtio and VF/PT devices
>>>>> underneath, virtio will be overloaded with too much specifics being a
>>>>> VF/PT backup in the future.
>>>> So this paragraph already includes at least two conflicting
>>>> proposals. On the one hand you want a separate device for
>>>> the virtual bond, on the other you are saying a separate
>>>> driver.
>>> Just to be crystal clear: separate virtual bond device (netdev ops,
>>> not necessarily bus device) for VM migration specifically with a
>>> separate driver.
>> Okay, but note that any config someone had on a virtio device won't
>> propagate to that bond.
>>
>>>> Further, the reason to have a separate *driver* was that
>>>> some people wanted to share code with netvsc - and that
>>>> one does not create a separate device, which you can't
>>>> change without breaking existing configs.
>>> I'm not sure I understand this statement. netvsc is already another
>>> netdev being created than the enslaved VF netdev, why it bothers?
>> Because it shipped, so userspace ABI is frozen. You can't really add a
>> netdevice and enslave an existing one without a risk of breaking some
>> userspace configs.
>>
> I still don't understand this concern. Like said, before this patch
> becomes reality, users interact with raw VF interface all the time.
> Now this patch introduces a virtio net devive and enslave the VF.
> Users have to interact with two interfaces - IP address and friends
> configured on the VF will get lost and users have to reconfigure
> virtio all over again. But some other configs e.g. ethtool needs to
> remain on the VF. How does it guarantee existing configs won't broken?
> Appears to me this is nothing different than having both virtio and VF
> netdevs enslaved and users operates on the virt-bond interface
> directly.
Yes. This patch doesn't transparently provide live migration support to
existing
network configurations that only use a VF. In order to get live
migration support,
for a VF only image, the network configuration has to change.
It provides hypervisor controlled VF acceleration to existing virtio_net
based network
configurations in a transparent manner.
>
> One thing I'd like to point out is the configs are mostly done in the
> control plane. It's entirely possible to separate the data and control
> paths in the new virt-bond driver: in the data plane, it may bypass
> the virt-bond layer and quickly fall through to the data path of
> virtio or VF slave; while in the control plane, the virt-bond may
> disguise itself as the active slave, delegate the config changes to
> the real driver, relay and expose driver config/state to the user. By
> doing that the users and userspace applications just interact with one
> single interface, the same way they interacted with the VF interface
> as before. Users don't have to deal with the other two enslaved
> interfaces directly - those automatically enslaved devices should be
> made invisible from userspace applications and admins, and/or be
> masked out from regular access by existing kernel APIs.
>
> I don't find it a good reason to reject the idea if we can sort out
> ways not to break existing ABI or APIs.
>
>
>>> In
>>> the Azure case, the stock image to be imported does not bind to a
>>> specific driver but only MAC address.
>> I'll let netvsc developers decide this, on the surface I don't think
>> it's reasonable to assume everyone only binds to a MAC.
> Sure. The point I wanted to make was that cloud providers are super
> elastic in provisioning images - those driver or device specifics
> should have been dehydrated from the original images thus make it
> flexible enough to deploy to machines with vast varieties of hardware.
> Although it's not necessarily the case everyone binds to a MAC, it's
> worth taking a look at what the target users are doing and what the
> pain points really are and understand what could be done to solve core
> problems. Hyper-V netvsc can also benefit once moved to it, I'd
> believe.
>
>>
>>> And people just deal with the
>>> new virt-bond netdev rather than the underlying virtio and VF. And
>>> both these two underlying netdevs should be made invisible to prevent
>>> userspace script from getting them misconfigured IMHO.
>>>
>>> A separate driver was for code sharing for sure, only just netvsc but
>>> could be other para-virtual devices floating around: any PV can serve
>>> as the side channel and the backup path for VF/PT. Once we get the new
>>> driver working atop virtio we may define ops and/or protocol needed to
>>> talk to various other PV frontend that may implement the side channel
>>> of its own for datapath switching (e.g. virtio is one of them, Xen PV
>>> frontend can be another). I just don't like to limit the function to
>>> virtio only and we have to duplicate code then it starts to scatter
>>> around all over the places.
>>>
>>> I understand right now we start it as simple so it may just be fine
>>> that the initial development activities center around virtio. However,
>>> from cloud provider/vendor perspective I don't see the proposed scheme
>>> limits to virtio only. Any other PV driver which has the plan to
>>> support the same scheme can benefit. The point is that we shouldn't be
>>> limiting the scheme to virtio specifics so early which is hard to have
>>> it promoted to a common driver once we get there.
>> The whole idea has been floating around for years. It would always
>> get being drowned in this kind of "lets try to cover all use-cases"
>> discussions, and never make progress.
>> So let's see some working code merged. If it works fine for virtio
>> and turns out to be a good fit for netvsc, we can share code.
> I think we at least should start with a separate netdev other than
> virtio. That is what we may agree to have to do without comprise I'd
> hope.
I think the usecase that you are targeting does require a new para virt
bond driver
and a new type of netdevice.
For the usecase where the host is doing all the guest network
tuning/optimizations
and the VM is not expected to do any tuning/optimizations on the VF
driver directly,
i think the current patch that follows the netvsc model of 2
netdevs(virtio and vf) should
work fine.
>>
>>>> So some people want a fully userspace-configurable switchdev, and that
>>>> already exists at some level, and maybe it makes sense to add more
>>>> features for performance.
>>>>
>>>> But the point was that some host configurations are very simple,
>>>> and it probably makes sense to pass this information to the guest
>>>> and have guest act on it directly. Let's not conflate the two.
>>> It may be fine to push some of the configurations from host but that
>>> perhaps doesn't cover all the cases: how is it possible for the host
>>> to save all network states and configs done by the guest before
>>> migration. Some of the configs might come from future guest which is
>>> unknown to host. Anyhow the bottom line is that the guest must be able
>>> to act on those configuration request changes automatically without
>>> involving users intervention.
>>>
>>> Regards,
>>> -Siwei
>> All use-cases are *already* covered by existing kernel APIs. Just use a
>> bond, or a bridge, or whatever. It's just that they are so generic and
>> hard to use, that userspace to do it never surfaced.
> As mentioned earlier, for which I cannot stress enough, the existing
> generic bond or bridge doesn't work. We need a new net device that
> works for live migration specifically and fits it well.
>
>> So I am interested in some code that handles some simple use-cases
>> in the kernel, with a simple kernel API.
> It should be fine, I like simple stuffs too and wouldn't like to make
> complications. The concept of hiding auto-managed interfaces is not
> new and has even been implemented by other operating systems already.
> Not sure if that is your compatibility concern. We start with simple
> for sure, but simple != in-expandable then make potential users
> impossible to use at all.
>
>
> Thanks,
> -Siwei
>
>>>> --
>>>> MST
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [PATCH v24 1/2] mm: support reporting free page blocks
From: Michael S. Tsirkin @ 2018-01-26 15:00 UTC (permalink / raw)
To: Wei Wang
Cc: yang.zhang.wz, virtio-dev, riel, quan.xu0, kvm, nilal,
liliang.opensource, linux-kernel, mhocko, linux-mm, pbonzini,
akpm, virtualization
In-Reply-To: <5A6AA08B.2080508@intel.com>
On Fri, Jan 26, 2018 at 11:29:15AM +0800, Wei Wang wrote:
> On 01/25/2018 09:41 PM, Michael S. Tsirkin wrote:
> > On Wed, Jan 24, 2018 at 06:42:41PM +0800, Wei Wang wrote:
> > > This patch adds support to walk through the free page blocks in the
> > > system and report them via a callback function. Some page blocks may
> > > leave the free list after zone->lock is released, so it is the caller's
> > > responsibility to either detect or prevent the use of such pages.
> > >
> > > One use example of this patch is to accelerate live migration by skipping
> > > the transfer of free pages reported from the guest. A popular method used
> > > by the hypervisor to track which part of memory is written during live
> > > migration is to write-protect all the guest memory. So, those pages that
> > > are reported as free pages but are written after the report function
> > > returns will be captured by the hypervisor, and they will be added to the
> > > next round of memory transfer.
> > >
> > > Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> > > Signed-off-by: Liang Li <liang.z.li@intel.com>
> > > Cc: Michal Hocko <mhocko@kernel.org>
> > > Cc: Michael S. Tsirkin <mst@redhat.com>
> > > Acked-by: Michal Hocko <mhocko@kernel.org>
> > > ---
> > > include/linux/mm.h | 6 ++++
> > > mm/page_alloc.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > 2 files changed, 97 insertions(+)
> > >
> > > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > > index ea818ff..b3077dd 100644
> > > --- a/include/linux/mm.h
> > > +++ b/include/linux/mm.h
> > > @@ -1938,6 +1938,12 @@ extern void free_area_init_node(int nid, unsigned long * zones_size,
> > > unsigned long zone_start_pfn, unsigned long *zholes_size);
> > > extern void free_initmem(void);
> > > +extern void walk_free_mem_block(void *opaque,
> > > + int min_order,
> > > + bool (*report_pfn_range)(void *opaque,
> > > + unsigned long pfn,
> > > + unsigned long num));
> > > +
> > > /*
> > > * Free reserved pages within range [PAGE_ALIGN(start), end & PAGE_MASK)
> > > * into the buddy system. The freed pages will be poisoned with pattern
> > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > > index 76c9688..705de22 100644
> > > --- a/mm/page_alloc.c
> > > +++ b/mm/page_alloc.c
> > > @@ -4899,6 +4899,97 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
> > > show_swap_cache_info();
> > > }
> > > +/*
> > > + * Walk through a free page list and report the found pfn range via the
> > > + * callback.
> > > + *
> > > + * Return false if the callback requests to stop reporting. Otherwise,
> > > + * return true.
> > > + */
> > > +static bool walk_free_page_list(void *opaque,
> > > + struct zone *zone,
> > > + int order,
> > > + enum migratetype mt,
> > > + bool (*report_pfn_range)(void *,
> > > + unsigned long,
> > > + unsigned long))
> > > +{
> > > + struct page *page;
> > > + struct list_head *list;
> > > + unsigned long pfn, flags;
> > > + bool ret;
> > > +
> > > + spin_lock_irqsave(&zone->lock, flags);
> > > + list = &zone->free_area[order].free_list[mt];
> > > + list_for_each_entry(page, list, lru) {
> > > + pfn = page_to_pfn(page);
> > > + ret = report_pfn_range(opaque, pfn, 1 << order);
> > > + if (!ret)
> > > + break;
> > > + }
> > > + spin_unlock_irqrestore(&zone->lock, flags);
> > > +
> > > + return ret;
> > > +}
> > There are two issues with this API. One is that it is not
> > restarteable: if you return false, you start from the
> > beginning. So no way to drop lock, do something slow
> > and then proceed.
> >
> > Another is that you are using it to report free page hints. Presumably
> > the point is to drop these pages - keeping them near head of the list
> > and reusing the reported ones will just make everything slower
> > invalidating the hint.
> >
> > How about rotating these pages towards the end of the list?
> > Probably not on each call, callect reported pages and then
> > move them to tail when we exit.
>
>
> I'm not sure how this would help. For example, we have a list of 2M free
> page blocks:
> A-->B-->C-->D-->E-->F-->G--H
>
> After reporting A and B, and put them to the end and exit, when the caller
> comes back,
> 1) if the list remains unchanged, then it will be
> C-->D-->E-->F-->G-->H-->A-->B
Right. So here we can just scan until we see A, right? It's a harder
question what to do if A and only A has been consumed. We don't want B
to be sent twice ideally. OTOH maybe that isn't a big deal if it's only
twice. Host might know page is already gone - how about host gives us a
hint after using the buffer?
> 2) If worse, all the blocks have been split into smaller blocks and used
> after the caller comes back.
>
> where could we continue?
I'm not sure. But an alternative appears to be to hold a lock
and just block whoever wanted to use any pages. Yes we are sending
hints faster but apparently something wanted these pages, and holding
the lock is interfering with this something.
>
> The reason to think about "restart" is the worry about the virtqueue is
> full, right? But we've agreed that losing some hints to report isn't
> important, and in practice, the virtqueue won't be full as the host side is
> faster.
It would be more convincing if we sent e.g. higher order pages
first. As it is - it won't take long to stuff ring full of
4K pages and it seems highly unlikely that host won't ever
be scheduled out.
Can we maybe agree on what kind of benchmark makes sense for
this work? I'm concerned that we are laser focused on just
how long does it take to migrate ignoring e.g. slowdowns
after migration.
> I'm concerned that actions on the free list may cause more controversy
> though it might be safe to do from some aspect, and would be hard to end
> debating. If possible, we could go with the most prudent approach for now,
> and have more discussions in future improvement patches. What would you
> think?
Well I'm not 100% about restartability. But keeping pages
freed by host near head of the list looks kind of wrong.
Try to float a patch on top for the rotation and see what happens?
>
>
> >
> >
> > > +
> > > +/**
> > > + * walk_free_mem_block - Walk through the free page blocks in the system
> > > + * @opaque: the context passed from the caller
> > > + * @min_order: the minimum order of free lists to check
> > > + * @report_pfn_range: the callback to report the pfn range of the free pages
> > > + *
> > > + * If the callback returns false, stop iterating the list of free page blocks.
> > > + * Otherwise, continue to report.
> > > + *
> > > + * Please note that there are no locking guarantees for the callback and
> > > + * that the reported pfn range might be freed or disappear after the
> > > + * callback returns so the caller has to be very careful how it is used.
> > > + *
> > > + * The callback itself must not sleep or perform any operations which would
> > > + * require any memory allocations directly (not even GFP_NOWAIT/GFP_ATOMIC)
> > > + * or via any lock dependency. It is generally advisable to implement
> > > + * the callback as simple as possible and defer any heavy lifting to a
> > > + * different context.
> > > + *
> > > + * There is no guarantee that each free range will be reported only once
> > > + * during one walk_free_mem_block invocation.
> > > + *
> > > + * pfn_to_page on the given range is strongly discouraged and if there is
> > > + * an absolute need for that make sure to contact MM people to discuss
> > > + * potential problems.
> > > + *
> > > + * The function itself might sleep so it cannot be called from atomic
> > > + * contexts.
> > > + *
> > > + * In general low orders tend to be very volatile and so it makes more
> > > + * sense to query larger ones first for various optimizations which like
> > > + * ballooning etc... This will reduce the overhead as well.
> > > + */
> > > +void walk_free_mem_block(void *opaque,
> > > + int min_order,
> > > + bool (*report_pfn_range)(void *opaque,
> > > + unsigned long pfn,
> > > + unsigned long num))
> > > +{
> > > + struct zone *zone;
> > > + int order;
> > > + enum migratetype mt;
> > > + bool ret;
> > > +
> > > + for_each_populated_zone(zone) {
> > > + for (order = MAX_ORDER - 1; order >= min_order; order--) {
> > > + for (mt = 0; mt < MIGRATE_TYPES; mt++) {
> > > + ret = walk_free_page_list(opaque, zone,
> > > + order, mt,
> > > + report_pfn_range);
> > > + if (!ret)
> > > + return;
> > > + }
> > > + }
> > > + }
> > > +}
> > > +EXPORT_SYMBOL_GPL(walk_free_mem_block);
> > > +
> > I think callers need a way to
> > 1. distinguish between completion and exit on error
>
> The first one here has actually been achieved by v25, where
> walk_free_mem_block returns 0 on completing the reporting, or a non-zero
> value which is returned from the callback.
> So the caller will detect errors via letting the callback to return
> something.
>
> Best,
> Wei
>
>
^ permalink raw reply
* [PATCH v3 2/2] drm/virtio: Handle buffers from the compositor
From: Tomeu Vizoso @ 2018-01-26 13:58 UTC (permalink / raw)
To: linux-kernel
Cc: Tomeu Vizoso, David Airlie, dri-devel, virtualization,
Zach Reizner, kernel
In-Reply-To: <20180126135803.29781-1-tomeu.vizoso@collabora.com>
When retrieving queued messages from the compositor in the host for
clients in the guest, handle buffers that may be passed.
These buffers should have been mapped to the guest's address space, for
example via the KVM_SET_USER_MEMORY_REGION ioctl.
Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
---
drivers/gpu/drm/virtio/virtgpu_ioctl.c | 54 ++++++++++++++++++++++++++++++++++
1 file changed, 54 insertions(+)
diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index d4230b1fa91d..57b1ad51d251 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -545,14 +545,58 @@ static unsigned int winsrv_poll(struct file *filp,
return mask;
}
+struct virtio_gpu_winsrv_region {
+ uint64_t pfn;
+ size_t size;
+};
+
+static int winsrv_fd_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct virtio_gpu_winsrv_region *region = filp->private_data;
+ unsigned long vm_size = vma->vm_end - vma->vm_start;
+ int ret = 0;
+
+ if (vm_size +
+ (vma->vm_pgoff << PAGE_SHIFT) > PAGE_ALIGN(region->size))
+ return -EINVAL;
+
+ ret = io_remap_pfn_range(vma, vma->vm_start, region->pfn, vm_size,
+ vma->vm_page_prot);
+ if (ret)
+ return ret;
+
+ vma->vm_flags |= VM_PFNMAP | VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+
+ return ret;
+}
+
+static int winsrv_fd_release(struct inode *inodep, struct file *filp)
+{
+ struct virtio_gpu_winsrv_region *region = filp->private_data;
+
+ kfree(region);
+
+ return 0;
+}
+
+static const struct file_operations winsrv_fd_fops = {
+ .mmap = winsrv_fd_mmap,
+ .release = winsrv_fd_release,
+};
+
static int winsrv_ioctl_rx(struct virtio_gpu_device *vgdev,
struct virtio_gpu_winsrv_conn *conn,
struct drm_virtgpu_winsrv *cmd)
{
struct virtio_gpu_winsrv_rx_qentry *qentry, *tmp;
struct virtio_gpu_winsrv_rx *virtio_cmd;
+ struct virtio_gpu_winsrv_region *region;
int available_len = cmd->len;
int read_count = 0;
+ int i;
+
+ for (i = 0; i < VIRTGPU_WINSRV_MAX_ALLOCS; i++)
+ cmd->fds[i] = -1;
list_for_each_entry_safe(qentry, tmp, &conn->cmdq, next) {
virtio_cmd = qentry->cmd;
@@ -567,6 +611,16 @@ static int winsrv_ioctl_rx(struct virtio_gpu_device *vgdev,
return -EFAULT;
}
+ for (i = 0; virtio_cmd->pfns[i]; i++) {
+ region = kmalloc(sizeof(*region), GFP_KERNEL);
+ region->pfn = virtio_cmd->pfns[i];
+ region->size = virtio_cmd->lens[i];
+ cmd->fds[i] = anon_inode_getfd("[winsrv_fd]",
+ &winsrv_fd_fops,
+ region,
+ O_CLOEXEC | O_RDWR);
+ }
+
available_len -= virtio_cmd->len;
read_count += virtio_cmd->len;
--
2.14.3
^ permalink raw reply related
* [PATCH v3 1/2] drm/virtio: Add window server support
From: Tomeu Vizoso @ 2018-01-26 13:58 UTC (permalink / raw)
To: linux-kernel
Cc: Tomeu Vizoso, Michael S. Tsirkin, David Airlie, dri-devel,
virtualization, Zach Reizner, kernel
In-Reply-To: <20180126135803.29781-1-tomeu.vizoso@collabora.com>
This is to allow clients running within VMs to be able to communicate
with a compositor in the host. Clients will use the communication
protocol that the compositor supports, and virtio-gpu will assist with
making buffers available in both sides, and copying content as needed.
It is expected that a service in the guest will act as a proxy,
interacting with virtio-gpu to support unmodified clients. For some
features of the protocol, the hypervisor might have to intervene and
also parse the protocol data to properly bridge resources. The following
IOCTLs have been added to this effect:
*_WINSRV_CONNECT: Opens a connection to the compositor in the host,
returns a FD that represents this connection and on which the following
IOCTLs can be used. Callers are expected to poll this FD for new
messages from the compositor.
*_WINSRV_TX: Asks the hypervisor to forward a message to the compositor
*_WINSRV_RX: Returns all queued messages
Alongside protocol data that is opaque to the kernel, the client can
send file descriptors that reference GEM buffers allocated by
virtio-gpu. The guest proxy is expected to figure out when a client is
passing a FD that refers to shared memory in the guest and allocate a
virtio-gpu buffer of the same size with DRM_VIRTGPU_RESOURCE_CREATE.
When the client notifies the compositor that it can read from that buffer,
the proxy should copy the contents from the SHM region to the virtio-gpu
resource and call DRM_VIRTGPU_TRANSFER_TO_HOST.
This has been tested with Wayland clients that make use of wl_shm to
pass buffers to the compositor, but is expected to work similarly for X
clients that make use of MIT-SHM with FD passing.
v2: * Add padding to two virtio command structs
* Properly cast two __user pointers (kbuild test robot)
v3: * Handle absence of winsrv support in QEMU (Dave Airlie)
Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
---
drivers/gpu/drm/virtio/virtgpu_drv.c | 1 +
drivers/gpu/drm/virtio/virtgpu_drv.h | 39 ++++-
drivers/gpu/drm/virtio/virtgpu_ioctl.c | 165 +++++++++++++++++++
drivers/gpu/drm/virtio/virtgpu_kms.c | 66 ++++++--
drivers/gpu/drm/virtio/virtgpu_vq.c | 285 ++++++++++++++++++++++++++++++++-
include/uapi/drm/virtgpu_drm.h | 29 ++++
include/uapi/linux/virtio_gpu.h | 43 +++++
7 files changed, 613 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
index 49a3d8d5a249..a528ddedd09f 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -79,6 +79,7 @@ static unsigned int features[] = {
*/
VIRTIO_GPU_F_VIRGL,
#endif
+ VIRTIO_GPU_F_WINSRV,
};
static struct virtio_driver virtio_gpu_driver = {
.feature_table = features,
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index da2fb585fea4..268b386e1232 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -178,6 +178,8 @@ struct virtio_gpu_device {
struct virtio_gpu_queue ctrlq;
struct virtio_gpu_queue cursorq;
+ struct virtio_gpu_queue winsrv_rxq;
+ struct virtio_gpu_queue winsrv_txq;
struct kmem_cache *vbufs;
bool vqs_ready;
@@ -205,10 +207,32 @@ struct virtio_gpu_device {
struct virtio_gpu_fpriv {
uint32_t ctx_id;
+
+ struct list_head winsrv_conns; /* list of virtio_gpu_winsrv_conn */
+ spinlock_t winsrv_lock;
+};
+
+struct virtio_gpu_winsrv_rx_qentry {
+ struct virtio_gpu_winsrv_rx *cmd;
+ struct list_head next;
+};
+
+struct virtio_gpu_winsrv_conn {
+ struct virtio_gpu_device *vgdev;
+
+ spinlock_t lock;
+
+ int fd;
+ struct drm_file *drm_file;
+
+ struct list_head cmdq; /* queue of virtio_gpu_winsrv_rx_qentry */
+ wait_queue_head_t cmdwq;
+
+ struct list_head next;
};
/* virtio_ioctl.c */
-#define DRM_VIRTIO_NUM_IOCTLS 10
+#define DRM_VIRTIO_NUM_IOCTLS 11
extern struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS];
/* virtio_kms.c */
@@ -318,9 +342,22 @@ virtio_gpu_cmd_resource_create_3d(struct virtio_gpu_device *vgdev,
void virtio_gpu_ctrl_ack(struct virtqueue *vq);
void virtio_gpu_cursor_ack(struct virtqueue *vq);
void virtio_gpu_fence_ack(struct virtqueue *vq);
+void virtio_gpu_winsrv_tx_ack(struct virtqueue *vq);
+void virtio_gpu_winsrv_rx_read(struct virtqueue *vq);
void virtio_gpu_dequeue_ctrl_func(struct work_struct *work);
void virtio_gpu_dequeue_cursor_func(struct work_struct *work);
+void virtio_gpu_dequeue_winsrv_rx_func(struct work_struct *work);
+void virtio_gpu_dequeue_winsrv_tx_func(struct work_struct *work);
void virtio_gpu_dequeue_fence_func(struct work_struct *work);
+void virtio_gpu_fill_winsrv_rx(struct virtio_gpu_device *vgdev);
+void virtio_gpu_queue_winsrv_rx_in(struct virtio_gpu_device *vgdev,
+ struct virtio_gpu_winsrv_rx *cmd);
+int virtio_gpu_cmd_winsrv_connect(struct virtio_gpu_device *vgdev, int fd);
+void virtio_gpu_cmd_winsrv_disconnect(struct virtio_gpu_device *vgdev, int fd);
+int virtio_gpu_cmd_winsrv_tx(struct virtio_gpu_device *vgdev,
+ const char __user *buffer, u32 len,
+ int *fds, struct virtio_gpu_winsrv_conn *conn,
+ bool nonblock);
/* virtio_gpu_display.c */
int virtio_gpu_framebuffer_init(struct drm_device *dev,
diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index 0528edb4a2bf..d4230b1fa91d 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -25,6 +25,9 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
+#include <linux/anon_inodes.h>
+#include <linux/syscalls.h>
+
#include <drm/drmP.h>
#include <drm/virtgpu_drm.h>
#include <drm/ttm/ttm_execbuf_util.h>
@@ -527,6 +530,165 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev,
return 0;
}
+static unsigned int winsrv_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ struct virtio_gpu_winsrv_conn *conn = filp->private_data;
+ unsigned int mask = 0;
+
+ spin_lock(&conn->lock);
+ poll_wait(filp, &conn->cmdwq, wait);
+ if (!list_empty(&conn->cmdq))
+ mask |= POLLIN | POLLRDNORM;
+ spin_unlock(&conn->lock);
+
+ return mask;
+}
+
+static int winsrv_ioctl_rx(struct virtio_gpu_device *vgdev,
+ struct virtio_gpu_winsrv_conn *conn,
+ struct drm_virtgpu_winsrv *cmd)
+{
+ struct virtio_gpu_winsrv_rx_qentry *qentry, *tmp;
+ struct virtio_gpu_winsrv_rx *virtio_cmd;
+ int available_len = cmd->len;
+ int read_count = 0;
+
+ list_for_each_entry_safe(qentry, tmp, &conn->cmdq, next) {
+ virtio_cmd = qentry->cmd;
+ if (virtio_cmd->len > available_len)
+ return 0;
+
+ if (copy_to_user((void __user *)cmd->data + read_count,
+ virtio_cmd->data,
+ virtio_cmd->len)) {
+ /* return error unless we have some data to return */
+ if (read_count == 0)
+ return -EFAULT;
+ }
+
+ available_len -= virtio_cmd->len;
+ read_count += virtio_cmd->len;
+
+ virtio_gpu_queue_winsrv_rx_in(vgdev, virtio_cmd);
+
+ list_del(&qentry->next);
+ kfree(qentry);
+ }
+
+ cmd->len = read_count;
+
+ return 0;
+}
+
+static long winsrv_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ struct virtio_gpu_winsrv_conn *conn = filp->private_data;
+ struct virtio_gpu_device *vgdev = conn->vgdev;
+ struct drm_virtgpu_winsrv winsrv_cmd;
+ int ret;
+
+ if (_IOC_SIZE(cmd) > sizeof(winsrv_cmd))
+ return -EINVAL;
+
+ if (copy_from_user(&winsrv_cmd, (void __user *)arg,
+ _IOC_SIZE(cmd)) != 0)
+ return -EFAULT;
+
+ switch (cmd) {
+ case DRM_IOCTL_VIRTGPU_WINSRV_RX:
+ ret = winsrv_ioctl_rx(vgdev, conn, &winsrv_cmd);
+ if (copy_to_user((void __user *)arg, &winsrv_cmd,
+ _IOC_SIZE(cmd)) != 0)
+ return -EFAULT;
+
+ break;
+
+ case DRM_IOCTL_VIRTGPU_WINSRV_TX:
+ ret = virtio_gpu_cmd_winsrv_tx(vgdev,
+ u64_to_user_ptr(winsrv_cmd.data),
+ winsrv_cmd.len,
+ winsrv_cmd.fds,
+ conn,
+ filp->f_flags & O_NONBLOCK);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int winsrv_release(struct inode *inodep, struct file *filp)
+{
+ struct virtio_gpu_winsrv_conn *conn = filp->private_data;
+ struct virtio_gpu_device *vgdev = conn->vgdev;
+
+ virtio_gpu_cmd_winsrv_disconnect(vgdev, conn->fd);
+
+ list_del(&conn->next);
+ kfree(conn);
+
+ return 0;
+}
+
+static const struct file_operations winsrv_fops = {
+ .poll = winsrv_poll,
+ .unlocked_ioctl = winsrv_ioctl,
+ .release = winsrv_release,
+};
+
+static int virtio_gpu_winsrv_connect(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct virtio_gpu_device *vgdev = dev->dev_private;
+ struct virtio_gpu_fpriv *vfpriv = file->driver_priv;
+ struct drm_virtgpu_winsrv_connect *args = data;
+ struct virtio_gpu_winsrv_conn *conn;
+ int ret;
+
+ if (!virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_WINSRV))
+ return -ENODEV;
+
+ conn = kzalloc(sizeof(*conn), GFP_KERNEL);
+ if (!conn)
+ return -ENOMEM;
+
+ conn->vgdev = vgdev;
+ conn->drm_file = file;
+ spin_lock_init(&conn->lock);
+ INIT_LIST_HEAD(&conn->cmdq);
+ init_waitqueue_head(&conn->cmdwq);
+
+ ret = anon_inode_getfd("[virtgpu_winsrv]", &winsrv_fops, conn,
+ O_CLOEXEC | O_RDWR);
+ if (ret < 0)
+ goto free_conn;
+
+ conn->fd = ret;
+
+ ret = virtio_gpu_cmd_winsrv_connect(vgdev, conn->fd);
+ if (ret < 0)
+ goto close_fd;
+
+ spin_lock(&vfpriv->winsrv_lock);
+ list_add_tail(&conn->next, &vfpriv->winsrv_conns);
+ spin_unlock(&vfpriv->winsrv_lock);
+
+ args->fd = conn->fd;
+
+ return 0;
+
+close_fd:
+ sys_close(conn->fd);
+
+free_conn:
+ kfree(conn);
+
+ return ret;
+}
+
struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS] = {
DRM_IOCTL_DEF_DRV(VIRTGPU_MAP, virtio_gpu_map_ioctl,
DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
@@ -558,4 +720,7 @@ struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS] = {
DRM_IOCTL_DEF_DRV(VIRTGPU_GET_CAPS, virtio_gpu_get_caps_ioctl,
DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
+
+ DRM_IOCTL_DEF_DRV(VIRTGPU_WINSRV_CONNECT, virtio_gpu_winsrv_connect,
+ DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
};
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c
index 6400506a06b0..87b118d4b13c 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -128,13 +128,16 @@ static void virtio_gpu_get_capsets(struct virtio_gpu_device *vgdev,
int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags)
{
static vq_callback_t *callbacks[] = {
- virtio_gpu_ctrl_ack, virtio_gpu_cursor_ack
+ virtio_gpu_ctrl_ack, virtio_gpu_cursor_ack,
+ virtio_gpu_winsrv_rx_read, virtio_gpu_winsrv_tx_ack
};
- static const char * const names[] = { "control", "cursor" };
+ static const char * const names[] = { "control", "cursor",
+ "winsrv-rx", "winsrv-tx" };
struct virtio_gpu_device *vgdev;
/* this will expand later */
- struct virtqueue *vqs[2];
+ struct virtqueue *vqs[4];
+ int nr_queues = 2;
u32 num_scanouts, num_capsets;
int ret;
@@ -158,6 +161,10 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags)
init_waitqueue_head(&vgdev->resp_wq);
virtio_gpu_init_vq(&vgdev->ctrlq, virtio_gpu_dequeue_ctrl_func);
virtio_gpu_init_vq(&vgdev->cursorq, virtio_gpu_dequeue_cursor_func);
+ virtio_gpu_init_vq(&vgdev->winsrv_rxq,
+ virtio_gpu_dequeue_winsrv_rx_func);
+ virtio_gpu_init_vq(&vgdev->winsrv_txq,
+ virtio_gpu_dequeue_winsrv_tx_func);
vgdev->fence_drv.context = dma_fence_context_alloc(1);
spin_lock_init(&vgdev->fence_drv.lock);
@@ -175,13 +182,21 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags)
DRM_INFO("virgl 3d acceleration not supported by guest\n");
#endif
- ret = virtio_find_vqs(vgdev->vdev, 2, vqs, callbacks, names, NULL);
+ if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_WINSRV))
+ nr_queues += 2;
+
+ ret = virtio_find_vqs(vgdev->vdev, nr_queues, vqs, callbacks, names,
+ NULL);
if (ret) {
DRM_ERROR("failed to find virt queues\n");
goto err_vqs;
}
vgdev->ctrlq.vq = vqs[0];
vgdev->cursorq.vq = vqs[1];
+ if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_WINSRV)) {
+ vgdev->winsrv_rxq.vq = vqs[2];
+ vgdev->winsrv_txq.vq = vqs[3];
+ }
ret = virtio_gpu_alloc_vbufs(vgdev);
if (ret) {
DRM_ERROR("failed to alloc vbufs\n");
@@ -215,6 +230,10 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags)
goto err_modeset;
virtio_device_ready(vgdev->vdev);
+
+ if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_WINSRV))
+ virtio_gpu_fill_winsrv_rx(vgdev);
+
vgdev->vqs_ready = true;
if (num_capsets)
@@ -256,6 +275,8 @@ void virtio_gpu_driver_unload(struct drm_device *dev)
vgdev->vqs_ready = false;
flush_work(&vgdev->ctrlq.dequeue_work);
flush_work(&vgdev->cursorq.dequeue_work);
+ flush_work(&vgdev->winsrv_rxq.dequeue_work);
+ flush_work(&vgdev->winsrv_txq.dequeue_work);
flush_work(&vgdev->config_changed_work);
vgdev->vdev->config->del_vqs(vgdev->vdev);
@@ -274,25 +295,43 @@ int virtio_gpu_driver_open(struct drm_device *dev, struct drm_file *file)
uint32_t id;
char dbgname[64], tmpname[TASK_COMM_LEN];
- /* can't create contexts without 3d renderer */
- if (!vgdev->has_virgl_3d)
- return 0;
-
- get_task_comm(tmpname, current);
- snprintf(dbgname, sizeof(dbgname), "%s", tmpname);
- dbgname[63] = 0;
/* allocate a virt GPU context for this opener */
vfpriv = kzalloc(sizeof(*vfpriv), GFP_KERNEL);
if (!vfpriv)
return -ENOMEM;
- virtio_gpu_context_create(vgdev, strlen(dbgname), dbgname, &id);
+ /* can't create contexts without 3d renderer */
+ if (vgdev->has_virgl_3d) {
+ get_task_comm(tmpname, current);
+ snprintf(dbgname, sizeof(dbgname), "%s", tmpname);
+ dbgname[63] = 0;
+
+ virtio_gpu_context_create(vgdev, strlen(dbgname), dbgname, &id);
+
+ vfpriv->ctx_id = id;
+ }
+
+ spin_lock_init(&vfpriv->winsrv_lock);
+ INIT_LIST_HEAD(&vfpriv->winsrv_conns);
- vfpriv->ctx_id = id;
file->driver_priv = vfpriv;
+
return 0;
}
+static void virtio_gpu_cleanup_conns(struct virtio_gpu_fpriv *vfpriv)
+{
+ struct virtio_gpu_winsrv_conn *conn, *tmp;
+ struct virtio_gpu_winsrv_rx_qentry *qentry, *tmp2;
+
+ list_for_each_entry_safe(conn, tmp, &vfpriv->winsrv_conns, next) {
+ list_for_each_entry_safe(qentry, tmp2, &conn->cmdq, next) {
+ kfree(qentry);
+ }
+ kfree(conn);
+ }
+}
+
void virtio_gpu_driver_postclose(struct drm_device *dev, struct drm_file *file)
{
struct virtio_gpu_device *vgdev = dev->dev_private;
@@ -303,6 +342,7 @@ void virtio_gpu_driver_postclose(struct drm_device *dev, struct drm_file *file)
vfpriv = file->driver_priv;
+ virtio_gpu_cleanup_conns(vfpriv);
virtio_gpu_context_destroy(vgdev, vfpriv->ctx_id);
kfree(vfpriv);
file->driver_priv = NULL;
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c
index 9eb96fb2c147..ea5f9352d364 100644
--- a/drivers/gpu/drm/virtio/virtgpu_vq.c
+++ b/drivers/gpu/drm/virtio/virtgpu_vq.c
@@ -32,7 +32,7 @@
#include <linux/virtio_config.h>
#include <linux/virtio_ring.h>
-#define MAX_INLINE_CMD_SIZE 96
+#define MAX_INLINE_CMD_SIZE 144
#define MAX_INLINE_RESP_SIZE 24
#define VBUFFER_SIZE (sizeof(struct virtio_gpu_vbuffer) \
+ MAX_INLINE_CMD_SIZE \
@@ -72,6 +72,67 @@ void virtio_gpu_cursor_ack(struct virtqueue *vq)
schedule_work(&vgdev->cursorq.dequeue_work);
}
+void virtio_gpu_winsrv_rx_read(struct virtqueue *vq)
+{
+ struct drm_device *dev = vq->vdev->priv;
+ struct virtio_gpu_device *vgdev = dev->dev_private;
+
+ schedule_work(&vgdev->winsrv_rxq.dequeue_work);
+}
+
+void virtio_gpu_winsrv_tx_ack(struct virtqueue *vq)
+{
+ struct drm_device *dev = vq->vdev->priv;
+ struct virtio_gpu_device *vgdev = dev->dev_private;
+
+ schedule_work(&vgdev->winsrv_txq.dequeue_work);
+}
+
+void virtio_gpu_queue_winsrv_rx_in(struct virtio_gpu_device *vgdev,
+ struct virtio_gpu_winsrv_rx *cmd)
+{
+ struct virtqueue *vq = vgdev->winsrv_rxq.vq;
+ struct scatterlist sg[1];
+ int ret;
+
+ sg_init_one(sg, cmd, sizeof(*cmd));
+
+ spin_lock(&vgdev->winsrv_rxq.qlock);
+retry:
+ ret = virtqueue_add_inbuf(vq, sg, 1, cmd, GFP_KERNEL);
+ if (ret == -ENOSPC) {
+ spin_unlock(&vgdev->winsrv_rxq.qlock);
+ wait_event(vgdev->winsrv_rxq.ack_queue, vq->num_free);
+ spin_lock(&vgdev->winsrv_rxq.qlock);
+ goto retry;
+ }
+ virtqueue_kick(vq);
+ spin_unlock(&vgdev->winsrv_rxq.qlock);
+}
+
+void virtio_gpu_fill_winsrv_rx(struct virtio_gpu_device *vgdev)
+{
+ struct virtqueue *vq = vgdev->winsrv_rxq.vq;
+ struct virtio_gpu_winsrv_rx *cmd;
+ int ret = 0;
+
+ while (vq->num_free > 0) {
+ cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
+ if (!cmd) {
+ ret = -ENOMEM;
+ goto clear_queue;
+ }
+
+ virtio_gpu_queue_winsrv_rx_in(vgdev, cmd);
+ }
+
+ return;
+
+clear_queue:
+ while ((cmd = virtqueue_detach_unused_buf(vq)))
+ kfree(cmd);
+}
+
int virtio_gpu_alloc_vbufs(struct virtio_gpu_device *vgdev)
{
vgdev->vbufs = kmem_cache_create("virtio-gpu-vbufs",
@@ -258,6 +319,96 @@ void virtio_gpu_dequeue_cursor_func(struct work_struct *work)
wake_up(&vgdev->cursorq.ack_queue);
}
+void virtio_gpu_dequeue_winsrv_tx_func(struct work_struct *work)
+{
+ struct virtio_gpu_device *vgdev =
+ container_of(work, struct virtio_gpu_device,
+ winsrv_txq.dequeue_work);
+ struct virtio_gpu_vbuffer *vbuf;
+ int len;
+
+ spin_lock(&vgdev->winsrv_txq.qlock);
+ do {
+ while ((vbuf = virtqueue_get_buf(vgdev->winsrv_txq.vq, &len)))
+ free_vbuf(vgdev, vbuf);
+ } while (!virtqueue_enable_cb(vgdev->winsrv_txq.vq));
+ spin_unlock(&vgdev->winsrv_txq.qlock);
+
+ wake_up(&vgdev->winsrv_txq.ack_queue);
+}
+
+static struct virtio_gpu_winsrv_conn *find_conn(struct virtio_gpu_device *vgdev,
+ int fd)
+{
+ struct virtio_gpu_winsrv_conn *conn;
+ struct drm_device *ddev = vgdev->ddev;
+ struct drm_file *file;
+ struct virtio_gpu_fpriv *vfpriv;
+
+ mutex_lock(&ddev->filelist_mutex);
+ list_for_each_entry(file, &ddev->filelist, lhead) {
+ vfpriv = file->driver_priv;
+ spin_lock(&vfpriv->winsrv_lock);
+ list_for_each_entry(conn, &vfpriv->winsrv_conns, next) {
+ if (conn->fd == fd) {
+ spin_lock(&conn->lock);
+ spin_unlock(&vfpriv->winsrv_lock);
+ mutex_unlock(&ddev->filelist_mutex);
+ return conn;
+ }
+ }
+ spin_unlock(&vfpriv->winsrv_lock);
+ }
+ mutex_unlock(&ddev->filelist_mutex);
+
+ return NULL;
+}
+
+static void handle_rx_cmd(struct virtio_gpu_device *vgdev,
+ struct virtio_gpu_winsrv_rx *cmd)
+{
+ struct virtio_gpu_winsrv_conn *conn;
+ struct virtio_gpu_winsrv_rx_qentry *qentry;
+
+ conn = find_conn(vgdev, cmd->client_fd);
+ if (!conn) {
+ DRM_DEBUG("recv for unknown client fd %u\n", cmd->client_fd);
+ return;
+ }
+
+ qentry = kzalloc(sizeof(*qentry), GFP_KERNEL);
+ if (!qentry) {
+ spin_unlock(&conn->lock);
+ DRM_DEBUG("failed to allocate qentry for winsrv connection\n");
+ return;
+ }
+
+ qentry->cmd = cmd;
+
+ list_add_tail(&qentry->next, &conn->cmdq);
+ wake_up_interruptible(&conn->cmdwq);
+ spin_unlock(&conn->lock);
+}
+
+void virtio_gpu_dequeue_winsrv_rx_func(struct work_struct *work)
+{
+ struct virtio_gpu_device *vgdev =
+ container_of(work, struct virtio_gpu_device,
+ winsrv_rxq.dequeue_work);
+ struct virtio_gpu_winsrv_rx *cmd;
+ unsigned int len;
+
+ spin_lock(&vgdev->winsrv_rxq.qlock);
+ while ((cmd = virtqueue_get_buf(vgdev->winsrv_rxq.vq, &len)) != NULL) {
+ spin_unlock(&vgdev->winsrv_rxq.qlock);
+ handle_rx_cmd(vgdev, cmd);
+ spin_lock(&vgdev->winsrv_rxq.qlock);
+ }
+ spin_unlock(&vgdev->winsrv_rxq.qlock);
+
+ virtqueue_kick(vgdev->winsrv_rxq.vq);
+}
+
static int virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev,
struct virtio_gpu_vbuffer *vbuf)
__releases(&vgdev->ctrlq.qlock)
@@ -380,6 +531,41 @@ static int virtio_gpu_queue_cursor(struct virtio_gpu_device *vgdev,
return ret;
}
+static int virtio_gpu_queue_winsrv_tx(struct virtio_gpu_device *vgdev,
+ struct virtio_gpu_vbuffer *vbuf)
+{
+ struct virtqueue *vq = vgdev->winsrv_txq.vq;
+ struct scatterlist *sgs[2], vcmd, vout;
+ int ret;
+
+ if (!vgdev->vqs_ready)
+ return -ENODEV;
+
+ sg_init_one(&vcmd, vbuf->buf, vbuf->size);
+ sgs[0] = &vcmd;
+
+ sg_init_one(&vout, vbuf->data_buf, vbuf->data_size);
+ sgs[1] = &vout;
+
+ spin_lock(&vgdev->winsrv_txq.qlock);
+retry:
+ ret = virtqueue_add_sgs(vq, sgs, 2, 0, vbuf, GFP_ATOMIC);
+ if (ret == -ENOSPC) {
+ spin_unlock(&vgdev->winsrv_txq.qlock);
+ wait_event(vgdev->winsrv_txq.ack_queue, vq->num_free);
+ spin_lock(&vgdev->winsrv_txq.qlock);
+ goto retry;
+ }
+
+ virtqueue_kick(vq);
+
+ spin_unlock(&vgdev->winsrv_txq.qlock);
+
+ if (!ret)
+ ret = vq->num_free;
+ return ret;
+}
+
/* just create gem objects for userspace and long lived objects,
just use dma_alloced pages for the queue objects? */
@@ -890,3 +1076,100 @@ void virtio_gpu_cursor_ping(struct virtio_gpu_device *vgdev,
memcpy(cur_p, &output->cursor, sizeof(output->cursor));
virtio_gpu_queue_cursor(vgdev, vbuf);
}
+
+int virtio_gpu_cmd_winsrv_connect(struct virtio_gpu_device *vgdev, int fd)
+{
+ struct virtio_gpu_winsrv_connect *cmd_p;
+ struct virtio_gpu_vbuffer *vbuf;
+
+ cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p));
+ memset(cmd_p, 0, sizeof(*cmd_p));
+
+ cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_WINSRV_CONNECT);
+ cmd_p->client_fd = cpu_to_le32(fd);
+ return virtio_gpu_queue_ctrl_buffer(vgdev, vbuf);
+}
+
+void virtio_gpu_cmd_winsrv_disconnect(struct virtio_gpu_device *vgdev, int fd)
+{
+ struct virtio_gpu_winsrv_disconnect *cmd_p;
+ struct virtio_gpu_vbuffer *vbuf;
+
+ cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p));
+ memset(cmd_p, 0, sizeof(*cmd_p));
+
+ cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_WINSRV_DISCONNECT);
+ cmd_p->client_fd = cpu_to_le32(fd);
+ virtio_gpu_queue_ctrl_buffer(vgdev, vbuf);
+}
+
+int virtio_gpu_cmd_winsrv_tx(struct virtio_gpu_device *vgdev,
+ const char __user *buffer, u32 len,
+ int *fds, struct virtio_gpu_winsrv_conn *conn,
+ bool nonblock)
+{
+ int client_fd = conn->fd;
+ struct drm_file *file = conn->drm_file;
+ struct virtio_gpu_winsrv_tx *cmd_p;
+ struct virtio_gpu_vbuffer *vbuf;
+ uint32_t gem_handle;
+ struct drm_gem_object *gobj = NULL;
+ struct virtio_gpu_object *qobj = NULL;
+ int ret, i, fd;
+
+ cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p));
+ memset(cmd_p, 0, sizeof(*cmd_p));
+
+ cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_WINSRV_TX);
+
+ for (i = 0; i < VIRTIO_GPU_WINSRV_MAX_ALLOCS; i++) {
+ cmd_p->resource_ids[i] = -1;
+
+ fd = fds[i];
+ if (fd < 0)
+ break;
+
+ ret = drm_gem_prime_fd_to_handle(vgdev->ddev, file, fd,
+ &gem_handle);
+ if (ret != 0)
+ goto err_free_vbuf;
+
+ gobj = drm_gem_object_lookup(file, gem_handle);
+ if (gobj == NULL) {
+ ret = -ENOENT;
+ goto err_free_vbuf;
+ }
+
+ qobj = gem_to_virtio_gpu_obj(gobj);
+ cmd_p->resource_ids[i] = qobj->hw_res_handle;
+ }
+
+ cmd_p->client_fd = client_fd;
+ cmd_p->len = cpu_to_le32(len);
+
+ /* gets freed when the ring has consumed it */
+ vbuf->data_buf = kmalloc(cmd_p->len, GFP_KERNEL);
+ if (!vbuf->data_buf) {
+ DRM_ERROR("failed to allocate winsrv tx buffer\n");
+ ret = -ENOMEM;
+ goto err_free_vbuf;
+ }
+
+ vbuf->data_size = cmd_p->len;
+
+ if (copy_from_user(vbuf->data_buf, buffer, cmd_p->len)) {
+ ret = -EFAULT;
+ goto err_free_databuf;
+ }
+
+ virtio_gpu_queue_winsrv_tx(vgdev, vbuf);
+
+ return 0;
+
+err_free_databuf:
+ kfree(vbuf->data_buf);
+err_free_vbuf:
+ free_vbuf(vgdev, vbuf);
+
+ return ret;
+}
diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h
index 91a31ffed828..89b0a1a707a7 100644
--- a/include/uapi/drm/virtgpu_drm.h
+++ b/include/uapi/drm/virtgpu_drm.h
@@ -46,6 +46,11 @@ extern "C" {
#define DRM_VIRTGPU_TRANSFER_TO_HOST 0x07
#define DRM_VIRTGPU_WAIT 0x08
#define DRM_VIRTGPU_GET_CAPS 0x09
+#define DRM_VIRTGPU_WINSRV_CONNECT 0x0a
+#define DRM_VIRTGPU_WINSRV_TX 0x0b
+#define DRM_VIRTGPU_WINSRV_RX 0x0c
+
+#define VIRTGPU_WINSRV_MAX_ALLOCS 28
struct drm_virtgpu_map {
__u64 offset; /* use for mmap system call */
@@ -132,6 +137,18 @@ struct drm_virtgpu_get_caps {
__u32 pad;
};
+struct drm_virtgpu_winsrv {
+ __s32 fds[VIRTGPU_WINSRV_MAX_ALLOCS];
+ __u64 data;
+ __u32 len;
+ __u32 pad;
+};
+
+struct drm_virtgpu_winsrv_connect {
+ __u32 fd; /* returned by kernel */
+ __u32 pad;
+};
+
#define DRM_IOCTL_VIRTGPU_MAP \
DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_MAP, struct drm_virtgpu_map)
@@ -167,6 +184,18 @@ struct drm_virtgpu_get_caps {
DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_GET_CAPS, \
struct drm_virtgpu_get_caps)
+#define DRM_IOCTL_VIRTGPU_WINSRV_CONNECT \
+ DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_WINSRV_CONNECT, \
+ struct drm_virtgpu_winsrv_connect)
+
+#define DRM_IOCTL_VIRTGPU_WINSRV_TX \
+ DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_WINSRV_TX, \
+ struct drm_virtgpu_winsrv)
+
+#define DRM_IOCTL_VIRTGPU_WINSRV_RX \
+ DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_WINSRV_RX, \
+ struct drm_virtgpu_winsrv)
+
#if defined(__cplusplus)
}
#endif
diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index 4b04ead26cd9..3567f84d03e9 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -41,6 +41,7 @@
#include <linux/types.h>
#define VIRTIO_GPU_F_VIRGL 0
+#define VIRTIO_GPU_F_WINSRV 1
enum virtio_gpu_ctrl_type {
VIRTIO_GPU_UNDEFINED = 0,
@@ -71,6 +72,12 @@ enum virtio_gpu_ctrl_type {
VIRTIO_GPU_CMD_UPDATE_CURSOR = 0x0300,
VIRTIO_GPU_CMD_MOVE_CURSOR,
+ /* window server commands */
+ VIRTIO_GPU_CMD_WINSRV_CONNECT = 0x0400,
+ VIRTIO_GPU_CMD_WINSRV_DISCONNECT,
+ VIRTIO_GPU_CMD_WINSRV_TX,
+ VIRTIO_GPU_CMD_WINSRV_RX,
+
/* success responses */
VIRTIO_GPU_RESP_OK_NODATA = 0x1100,
VIRTIO_GPU_RESP_OK_DISPLAY_INFO,
@@ -290,6 +297,42 @@ struct virtio_gpu_resp_capset {
__u8 capset_data[];
};
+/* VIRTIO_GPU_CMD_WINSRV_CONNECT */
+struct virtio_gpu_winsrv_connect {
+ struct virtio_gpu_ctrl_hdr hdr;
+ __le32 client_fd;
+ __le32 padding;
+};
+
+/* VIRTIO_GPU_CMD_WINSRV_DISCONNECT */
+struct virtio_gpu_winsrv_disconnect {
+ struct virtio_gpu_ctrl_hdr hdr;
+ __le32 client_fd;
+ __le32 padding;
+};
+
+#define VIRTIO_GPU_WINSRV_MAX_ALLOCS 28
+#define VIRTIO_GPU_WINSRV_TX_MAX_DATA 4096
+
+/* VIRTIO_GPU_CMD_WINSRV_TX */
+/* these commands are followed in the queue descriptor by protocol buffers */
+struct virtio_gpu_winsrv_tx {
+ struct virtio_gpu_ctrl_hdr hdr;
+ __u32 client_fd;
+ __u32 len;
+ __le32 resource_ids[VIRTIO_GPU_WINSRV_MAX_ALLOCS];
+};
+
+/* VIRTIO_GPU_CMD_WINSRV_RX */
+struct virtio_gpu_winsrv_rx {
+ struct virtio_gpu_ctrl_hdr hdr;
+ __le32 client_fd;
+ __u8 data[VIRTIO_GPU_WINSRV_TX_MAX_DATA];
+ __u32 len;
+ __u64 pfns[VIRTIO_GPU_WINSRV_MAX_ALLOCS];
+ __u32 lens[VIRTIO_GPU_WINSRV_MAX_ALLOCS];
+};
+
#define VIRTIO_GPU_EVENT_DISPLAY (1 << 0)
struct virtio_gpu_config {
--
2.14.3
^ permalink raw reply related
* [PATCH v3 0/2] drm/virtio: Add window server support
From: Tomeu Vizoso @ 2018-01-26 13:58 UTC (permalink / raw)
To: linux-kernel
Cc: Tomeu Vizoso, Michael S. Tsirkin, David Airlie, dri-devel,
virtualization, Zach Reizner, kernel
Hi,
this work is based on the virtio_wl driver in the ChromeOS kernel by
Zach Reizner, currently at:
https://chromium.googlesource.com/chromiumos/third_party/kernel/+/chromeos-4.4/drivers/virtio/virtio_wl.c
There's one feature missing currently, which is letting clients write
directly to the host part of a resource, so the extra copy in
TRANSFER_TO_HOST isn't needed.
Have pushed the QEMU counterpart to this branch, though it isn't as
polished atm:
https://gitlab.collabora.com/tomeu/qemu/commits/winsrv-wip
Thanks,
Tomeu
Tomeu Vizoso (2):
drm/virtio: Add window server support
drm/virtio: Handle buffers from the compositor
drivers/gpu/drm/virtio/virtgpu_drv.c | 1 +
drivers/gpu/drm/virtio/virtgpu_drv.h | 39 ++++-
drivers/gpu/drm/virtio/virtgpu_ioctl.c | 219 +++++++++++++++++++++++++
drivers/gpu/drm/virtio/virtgpu_kms.c | 66 ++++++--
drivers/gpu/drm/virtio/virtgpu_vq.c | 285 ++++++++++++++++++++++++++++++++-
include/uapi/drm/virtgpu_drm.h | 29 ++++
include/uapi/linux/virtio_gpu.h | 43 +++++
7 files changed, 667 insertions(+), 15 deletions(-)
--
2.14.3
^ permalink raw reply
* Re: [PATCH net-next 12/12] tools/virtio: fix smp_mb on x86
From: Michael S. Tsirkin @ 2018-01-26 13:45 UTC (permalink / raw)
To: Jason Wang
Cc: netdev, virtualization, John Fastabend, linux-kernel,
David Miller
In-Reply-To: <f2581b9f-9e0d-4db5-ee9d-a235bb0a1842@redhat.com>
On Fri, Jan 26, 2018 at 11:56:14AM +0800, Jason Wang wrote:
>
>
> On 2018年01月26日 07:36, Michael S. Tsirkin wrote:
> > Offset 128 overlaps the last word of the redzone.
> > Use 132 which is always beyond that.
> >
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> > tools/virtio/ringtest/main.h | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
> > index 593a328..301d59b 100644
> > --- a/tools/virtio/ringtest/main.h
> > +++ b/tools/virtio/ringtest/main.h
> > @@ -111,7 +111,7 @@ static inline void busy_wait(void)
> > }
> > #if defined(__x86_64__) || defined(__i386__)
> > -#define smp_mb() asm volatile("lock; addl $0,-128(%%rsp)" ::: "memory", "cc")
>
> Just wonder did "rsp" work for __i386__ ?
>
> Thanks
Oh you are right of course. Probably no one ever run this one on i386 :)
I'll add a patch on top as this is not a new bug.
> > +#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
> > #else
> > /*
> > * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [virtio-dev] Re: [PATCH v25 2/2] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
From: Tetsuo Handa @ 2018-01-26 13:35 UTC (permalink / raw)
To: Wei Wang, Michael S. Tsirkin
Cc: yang.zhang.wz, virtio-dev, riel, quan.xu0, kvm, nilal,
liliang.opensource, linux-kernel, mhocko, linux-mm, pbonzini,
akpm, virtualization
In-Reply-To: <5A6AA107.3000607@intel.com>
On 2018/01/26 12:31, Wei Wang wrote:
> On 01/26/2018 10:42 AM, Michael S. Tsirkin wrote:
>> On Fri, Jan 26, 2018 at 09:40:44AM +0800, Wei Wang wrote:
>>> On 01/25/2018 09:49 PM, Michael S. Tsirkin wrote:
>>>> On Thu, Jan 25, 2018 at 05:14:06PM +0800, Wei Wang wrote:
>>>>
>
>>> The controversy is that the free list is not static
>>> once the lock is dropped, so everything is dynamically changing, including
>>> the state that was recorded. The method we are using is more prudent, IMHO.
>>> How about taking the fundamental solution, and seek to improve incrementally
>>> in the future?
>>>
>>>
>>> Best,
>>> Wei
>> I'd like to see kicks happen outside the spinlock. kick with a spinlock
>> taken looks like a scalability issue that won't be easy to
>> reproduce but hurt workloads at random unexpected times.
>>
>
> Is that "kick inside the spinlock" the only concern you have? I think we can remove the kick actually. If we check how the host side works, it is worthwhile to let the host poll the virtqueue after it receives the cmd id from the guest (kick for cmd id isn't within the lock).
We should start from the worst case.
+ * The callback itself must not sleep or perform any operations which would
+ * require any memory allocations directly (not even GFP_NOWAIT/GFP_ATOMIC)
+ * or via any lock dependency. It is generally advisable to implement
+ * the callback as simple as possible and defer any heavy lifting to a
+ * different context.
Making decision based on performance numbers of idle guests is dangerous.
There might be busy CPUs waiting for zone->lock.
^ permalink raw reply
* Re: [RFC PATCH net-next v2 2/2] virtio_net: Extend virtio to use VF datapath when available
From: Siwei Liu @ 2018-01-26 8:14 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Alexander Duyck, virtio-dev, Jakub Kicinski, Sridhar Samudrala,
virtualization, Netdev, David Miller
In-Reply-To: <20180124004556-mutt-send-email-mst@kernel.org>
On Tue, Jan 23, 2018 at 2:58 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Tue, Jan 23, 2018 at 12:24:47PM -0800, Siwei Liu wrote:
>> On Mon, Jan 22, 2018 at 1:41 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
>> > On Mon, Jan 22, 2018 at 12:27:14PM -0800, Siwei Liu wrote:
>> >> First off, as mentioned in another thread, the model of stacking up
>> >> virt-bond functionality over virtio seems a wrong direction to me.
>> >> Essentially the migration process would need to carry over all guest
>> >> side configurations previously done on the VF/PT and get them moved to
>> >> the new device being it virtio or VF/PT.
>> >
>> > I might be wrong but I don't see why we should worry about this usecase.
>> > Whoever has a bond configured already has working config for migration.
>> > We are trying to help people who don't, not convert existig users.
>>
>> That has been placed in the view of cloud providers that the imported
>> images from the store must be able to run unmodified thus no
>> additional setup script is allowed (just as Stephen mentioned in
>> another mail). Cloud users don't care about live migration themselves
>> but the providers are required to implement such automation mechanism
>> to make this process transparent if at all possible. The user does not
>> care about the device underneath being VF or not, but they do care
>> about consistency all across and the resulting performance
>> acceleration in making VF the prefered datapath. It is not quite
>> peculiar user cases but IMHO *any* approach proposed for live
>> migration should be able to persist the state including network config
>> e.g. as simple as MTU. Actually this requirement has nothing to do
>> with virtio but our target users are live migration agnostic, being it
>> tracking DMA through dirty pages, using virtio as the helper, or
>> whatsoever, the goal of persisting configs across remains same.
>
> So the patching being discussed here will mostly do exactly that if your
> original config was simply a single virtio net device.
>
True, but I don't see the patch being discussed starts with good
foundation of supporting the same for VF/PT device. That is the core
of the issue.
>
> What kind of configs do your users have right now?
Any configs be it generic or driver specific that the VF/PT device
supports and have been enabled/configured. General network configs
(MAC, IP address, VLAN, MTU, iptables rules), ethtool settings
(hardware offload, # of queues and ring entris, RSC options, rss
rxfh-indir table, rx-flow-hash, et al) , bpf/XDP program being run, tc
flower offload, just to name a few. As cloud providers we don't limit
users from applying driver specific tuning to the NIC/VF, and
sometimes this is essential to achieving best performance for their
workload. We've seen cases like tuning coalescing parameters for
getting low latency, changing rx-flow-hash function for better VXLAN
throughput, or even adopting quite advanced NIC features such as flow
director or cloud filter. We don't expect users to compromise even a
little bit on these. That is once we turn on live migration for the VF
or pass through devices in the VM, it all takes place under the hood,
users (guest admins, applications) don't have to react upon it or even
notice the change. I should note that the majority of live migrations
take place between machines with completely identical hardware, it's
more critical than necessary to keep the config as-is across the move,
stealth while quiet.
As you see generic bond or bridge cannot suffice the need. That's why
we need a new customized virt bond driver, and tailor it for VM live
migration specifically. Leveraging para-virtual e.g. virtio net device
as the backup path is one thing, tracking through driver config
changes in order to re-config as necessary is another. I would think
without considering the latter, the proposal being discussed is rather
incomplete, and remote to be useful in production.
>
>
>> >
>> >> Without the help of a new
>> >> upper layer bond driver that enslaves virtio and VF/PT devices
>> >> underneath, virtio will be overloaded with too much specifics being a
>> >> VF/PT backup in the future.
>> >
>> > So this paragraph already includes at least two conflicting
>> > proposals. On the one hand you want a separate device for
>> > the virtual bond, on the other you are saying a separate
>> > driver.
>>
>> Just to be crystal clear: separate virtual bond device (netdev ops,
>> not necessarily bus device) for VM migration specifically with a
>> separate driver.
>
> Okay, but note that any config someone had on a virtio device won't
> propagate to that bond.
>
>> >
>> > Further, the reason to have a separate *driver* was that
>> > some people wanted to share code with netvsc - and that
>> > one does not create a separate device, which you can't
>> > change without breaking existing configs.
>>
>> I'm not sure I understand this statement. netvsc is already another
>> netdev being created than the enslaved VF netdev, why it bothers?
>
> Because it shipped, so userspace ABI is frozen. You can't really add a
> netdevice and enslave an existing one without a risk of breaking some
> userspace configs.
>
I still don't understand this concern. Like said, before this patch
becomes reality, users interact with raw VF interface all the time.
Now this patch introduces a virtio net devive and enslave the VF.
Users have to interact with two interfaces - IP address and friends
configured on the VF will get lost and users have to reconfigure
virtio all over again. But some other configs e.g. ethtool needs to
remain on the VF. How does it guarantee existing configs won't broken?
Appears to me this is nothing different than having both virtio and VF
netdevs enslaved and users operates on the virt-bond interface
directly.
One thing I'd like to point out is the configs are mostly done in the
control plane. It's entirely possible to separate the data and control
paths in the new virt-bond driver: in the data plane, it may bypass
the virt-bond layer and quickly fall through to the data path of
virtio or VF slave; while in the control plane, the virt-bond may
disguise itself as the active slave, delegate the config changes to
the real driver, relay and expose driver config/state to the user. By
doing that the users and userspace applications just interact with one
single interface, the same way they interacted with the VF interface
as before. Users don't have to deal with the other two enslaved
interfaces directly - those automatically enslaved devices should be
made invisible from userspace applications and admins, and/or be
masked out from regular access by existing kernel APIs.
I don't find it a good reason to reject the idea if we can sort out
ways not to break existing ABI or APIs.
>
>> In
>> the Azure case, the stock image to be imported does not bind to a
>> specific driver but only MAC address.
>
> I'll let netvsc developers decide this, on the surface I don't think
> it's reasonable to assume everyone only binds to a MAC.
Sure. The point I wanted to make was that cloud providers are super
elastic in provisioning images - those driver or device specifics
should have been dehydrated from the original images thus make it
flexible enough to deploy to machines with vast varieties of hardware.
Although it's not necessarily the case everyone binds to a MAC, it's
worth taking a look at what the target users are doing and what the
pain points really are and understand what could be done to solve core
problems. Hyper-V netvsc can also benefit once moved to it, I'd
believe.
>
>
>> And people just deal with the
>> new virt-bond netdev rather than the underlying virtio and VF. And
>> both these two underlying netdevs should be made invisible to prevent
>> userspace script from getting them misconfigured IMHO.
>>
>> A separate driver was for code sharing for sure, only just netvsc but
>> could be other para-virtual devices floating around: any PV can serve
>> as the side channel and the backup path for VF/PT. Once we get the new
>> driver working atop virtio we may define ops and/or protocol needed to
>> talk to various other PV frontend that may implement the side channel
>> of its own for datapath switching (e.g. virtio is one of them, Xen PV
>> frontend can be another). I just don't like to limit the function to
>> virtio only and we have to duplicate code then it starts to scatter
>> around all over the places.
>>
>> I understand right now we start it as simple so it may just be fine
>> that the initial development activities center around virtio. However,
>> from cloud provider/vendor perspective I don't see the proposed scheme
>> limits to virtio only. Any other PV driver which has the plan to
>> support the same scheme can benefit. The point is that we shouldn't be
>> limiting the scheme to virtio specifics so early which is hard to have
>> it promoted to a common driver once we get there.
>
> The whole idea has been floating around for years. It would always
> get being drowned in this kind of "lets try to cover all use-cases"
> discussions, and never make progress.
> So let's see some working code merged. If it works fine for virtio
> and turns out to be a good fit for netvsc, we can share code.
I think we at least should start with a separate netdev other than
virtio. That is what we may agree to have to do without comprise I'd
hope.
>
>
>> >
>> > So some people want a fully userspace-configurable switchdev, and that
>> > already exists at some level, and maybe it makes sense to add more
>> > features for performance.
>> >
>> > But the point was that some host configurations are very simple,
>> > and it probably makes sense to pass this information to the guest
>> > and have guest act on it directly. Let's not conflate the two.
>>
>> It may be fine to push some of the configurations from host but that
>> perhaps doesn't cover all the cases: how is it possible for the host
>> to save all network states and configs done by the guest before
>> migration. Some of the configs might come from future guest which is
>> unknown to host. Anyhow the bottom line is that the guest must be able
>> to act on those configuration request changes automatically without
>> involving users intervention.
>>
>> Regards,
>> -Siwei
>
> All use-cases are *already* covered by existing kernel APIs. Just use a
> bond, or a bridge, or whatever. It's just that they are so generic and
> hard to use, that userspace to do it never surfaced.
As mentioned earlier, for which I cannot stress enough, the existing
generic bond or bridge doesn't work. We need a new net device that
works for live migration specifically and fits it well.
>
> So I am interested in some code that handles some simple use-cases
> in the kernel, with a simple kernel API.
It should be fine, I like simple stuffs too and wouldn't like to make
complications. The concept of hiding auto-managed interfaces is not
new and has even been implemented by other operating systems already.
Not sure if that is your compatibility concern. We start with simple
for sure, but simple != in-expandable then make potential users
impossible to use at all.
Thanks,
-Siwei
>
>> >
>> > --
>> > MST
^ permalink raw reply
* Re: [PATCH net-next 12/12] tools/virtio: fix smp_mb on x86
From: Jason Wang @ 2018-01-26 3:56 UTC (permalink / raw)
To: Michael S. Tsirkin, linux-kernel
Cc: netdev, John Fastabend, David Miller, virtualization
In-Reply-To: <1516923320-16959-16-git-send-email-mst@redhat.com>
On 2018年01月26日 07:36, Michael S. Tsirkin wrote:
> Offset 128 overlaps the last word of the redzone.
> Use 132 which is always beyond that.
>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> tools/virtio/ringtest/main.h | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
> index 593a328..301d59b 100644
> --- a/tools/virtio/ringtest/main.h
> +++ b/tools/virtio/ringtest/main.h
> @@ -111,7 +111,7 @@ static inline void busy_wait(void)
> }
>
> #if defined(__x86_64__) || defined(__i386__)
> -#define smp_mb() asm volatile("lock; addl $0,-128(%%rsp)" ::: "memory", "cc")
Just wonder did "rsp" work for __i386__ ?
Thanks
> +#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
> #else
> /*
> * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [virtio-dev] Re: [PATCH v25 2/2] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
From: Wei Wang @ 2018-01-26 3:31 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: yang.zhang.wz, virtio-dev, riel, quan.xu0, kvm, nilal,
liliang.opensource, linux-kernel, mhocko, linux-mm, pbonzini,
akpm, virtualization
In-Reply-To: <20180126042649-mutt-send-email-mst@kernel.org>
On 01/26/2018 10:42 AM, Michael S. Tsirkin wrote:
> On Fri, Jan 26, 2018 at 09:40:44AM +0800, Wei Wang wrote:
>> On 01/25/2018 09:49 PM, Michael S. Tsirkin wrote:
>>> On Thu, Jan 25, 2018 at 05:14:06PM +0800, Wei Wang wrote:
>>>
>> The controversy is that the free list is not static
>> once the lock is dropped, so everything is dynamically changing, including
>> the state that was recorded. The method we are using is more prudent, IMHO.
>> How about taking the fundamental solution, and seek to improve incrementally
>> in the future?
>>
>>
>> Best,
>> Wei
> I'd like to see kicks happen outside the spinlock. kick with a spinlock
> taken looks like a scalability issue that won't be easy to
> reproduce but hurt workloads at random unexpected times.
>
Is that "kick inside the spinlock" the only concern you have? I think we
can remove the kick actually. If we check how the host side works, it is
worthwhile to let the host poll the virtqueue after it receives the cmd
id from the guest (kick for cmd id isn't within the lock).
Best,
Wei
^ permalink raw reply
* Re: [PATCH v24 1/2] mm: support reporting free page blocks
From: Wei Wang @ 2018-01-26 3:29 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: yang.zhang.wz, virtio-dev, riel, quan.xu0, kvm, nilal,
liliang.opensource, linux-kernel, mhocko, linux-mm, pbonzini,
akpm, virtualization
In-Reply-To: <20180125152933-mutt-send-email-mst@kernel.org>
On 01/25/2018 09:41 PM, Michael S. Tsirkin wrote:
> On Wed, Jan 24, 2018 at 06:42:41PM +0800, Wei Wang wrote:
>> This patch adds support to walk through the free page blocks in the
>> system and report them via a callback function. Some page blocks may
>> leave the free list after zone->lock is released, so it is the caller's
>> responsibility to either detect or prevent the use of such pages.
>>
>> One use example of this patch is to accelerate live migration by skipping
>> the transfer of free pages reported from the guest. A popular method used
>> by the hypervisor to track which part of memory is written during live
>> migration is to write-protect all the guest memory. So, those pages that
>> are reported as free pages but are written after the report function
>> returns will be captured by the hypervisor, and they will be added to the
>> next round of memory transfer.
>>
>> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
>> Signed-off-by: Liang Li <liang.z.li@intel.com>
>> Cc: Michal Hocko <mhocko@kernel.org>
>> Cc: Michael S. Tsirkin <mst@redhat.com>
>> Acked-by: Michal Hocko <mhocko@kernel.org>
>> ---
>> include/linux/mm.h | 6 ++++
>> mm/page_alloc.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 97 insertions(+)
>>
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index ea818ff..b3077dd 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -1938,6 +1938,12 @@ extern void free_area_init_node(int nid, unsigned long * zones_size,
>> unsigned long zone_start_pfn, unsigned long *zholes_size);
>> extern void free_initmem(void);
>>
>> +extern void walk_free_mem_block(void *opaque,
>> + int min_order,
>> + bool (*report_pfn_range)(void *opaque,
>> + unsigned long pfn,
>> + unsigned long num));
>> +
>> /*
>> * Free reserved pages within range [PAGE_ALIGN(start), end & PAGE_MASK)
>> * into the buddy system. The freed pages will be poisoned with pattern
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 76c9688..705de22 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -4899,6 +4899,97 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
>> show_swap_cache_info();
>> }
>>
>> +/*
>> + * Walk through a free page list and report the found pfn range via the
>> + * callback.
>> + *
>> + * Return false if the callback requests to stop reporting. Otherwise,
>> + * return true.
>> + */
>> +static bool walk_free_page_list(void *opaque,
>> + struct zone *zone,
>> + int order,
>> + enum migratetype mt,
>> + bool (*report_pfn_range)(void *,
>> + unsigned long,
>> + unsigned long))
>> +{
>> + struct page *page;
>> + struct list_head *list;
>> + unsigned long pfn, flags;
>> + bool ret;
>> +
>> + spin_lock_irqsave(&zone->lock, flags);
>> + list = &zone->free_area[order].free_list[mt];
>> + list_for_each_entry(page, list, lru) {
>> + pfn = page_to_pfn(page);
>> + ret = report_pfn_range(opaque, pfn, 1 << order);
>> + if (!ret)
>> + break;
>> + }
>> + spin_unlock_irqrestore(&zone->lock, flags);
>> +
>> + return ret;
>> +}
> There are two issues with this API. One is that it is not
> restarteable: if you return false, you start from the
> beginning. So no way to drop lock, do something slow
> and then proceed.
>
> Another is that you are using it to report free page hints. Presumably
> the point is to drop these pages - keeping them near head of the list
> and reusing the reported ones will just make everything slower
> invalidating the hint.
>
> How about rotating these pages towards the end of the list?
> Probably not on each call, callect reported pages and then
> move them to tail when we exit.
I'm not sure how this would help. For example, we have a list of 2M free
page blocks:
A-->B-->C-->D-->E-->F-->G--H
After reporting A and B, and put them to the end and exit, when the
caller comes back,
1) if the list remains unchanged, then it will be
C-->D-->E-->F-->G-->H-->A-->B
2) If worse, all the blocks have been split into smaller blocks and used
after the caller comes back.
where could we continue?
The reason to think about "restart" is the worry about the virtqueue is
full, right? But we've agreed that losing some hints to report isn't
important, and in practice, the virtqueue won't be full as the host side
is faster.
I'm concerned that actions on the free list may cause more controversy
though it might be safe to do from some aspect, and would be hard to end
debating. If possible, we could go with the most prudent approach for
now, and have more discussions in future improvement patches. What would
you think?
>
>
>> +
>> +/**
>> + * walk_free_mem_block - Walk through the free page blocks in the system
>> + * @opaque: the context passed from the caller
>> + * @min_order: the minimum order of free lists to check
>> + * @report_pfn_range: the callback to report the pfn range of the free pages
>> + *
>> + * If the callback returns false, stop iterating the list of free page blocks.
>> + * Otherwise, continue to report.
>> + *
>> + * Please note that there are no locking guarantees for the callback and
>> + * that the reported pfn range might be freed or disappear after the
>> + * callback returns so the caller has to be very careful how it is used.
>> + *
>> + * The callback itself must not sleep or perform any operations which would
>> + * require any memory allocations directly (not even GFP_NOWAIT/GFP_ATOMIC)
>> + * or via any lock dependency. It is generally advisable to implement
>> + * the callback as simple as possible and defer any heavy lifting to a
>> + * different context.
>> + *
>> + * There is no guarantee that each free range will be reported only once
>> + * during one walk_free_mem_block invocation.
>> + *
>> + * pfn_to_page on the given range is strongly discouraged and if there is
>> + * an absolute need for that make sure to contact MM people to discuss
>> + * potential problems.
>> + *
>> + * The function itself might sleep so it cannot be called from atomic
>> + * contexts.
>> + *
>> + * In general low orders tend to be very volatile and so it makes more
>> + * sense to query larger ones first for various optimizations which like
>> + * ballooning etc... This will reduce the overhead as well.
>> + */
>> +void walk_free_mem_block(void *opaque,
>> + int min_order,
>> + bool (*report_pfn_range)(void *opaque,
>> + unsigned long pfn,
>> + unsigned long num))
>> +{
>> + struct zone *zone;
>> + int order;
>> + enum migratetype mt;
>> + bool ret;
>> +
>> + for_each_populated_zone(zone) {
>> + for (order = MAX_ORDER - 1; order >= min_order; order--) {
>> + for (mt = 0; mt < MIGRATE_TYPES; mt++) {
>> + ret = walk_free_page_list(opaque, zone,
>> + order, mt,
>> + report_pfn_range);
>> + if (!ret)
>> + return;
>> + }
>> + }
>> + }
>> +}
>> +EXPORT_SYMBOL_GPL(walk_free_mem_block);
>> +
> I think callers need a way to
> 1. distinguish between completion and exit on error
The first one here has actually been achieved by v25, where
walk_free_mem_block returns 0 on completing the reporting, or a non-zero
value which is returned from the callback.
So the caller will detect errors via letting the callback to return
something.
Best,
Wei
^ permalink raw reply
* Re: [virtio-dev] Re: [PATCH v25 2/2] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
From: Michael S. Tsirkin @ 2018-01-26 2:42 UTC (permalink / raw)
To: Wei Wang
Cc: yang.zhang.wz, virtio-dev, riel, quan.xu0, kvm, nilal,
liliang.opensource, linux-kernel, mhocko, linux-mm, pbonzini,
akpm, virtualization
In-Reply-To: <5A6A871C.6040408@intel.com>
On Fri, Jan 26, 2018 at 09:40:44AM +0800, Wei Wang wrote:
> On 01/25/2018 09:49 PM, Michael S. Tsirkin wrote:
> > On Thu, Jan 25, 2018 at 05:14:06PM +0800, Wei Wang wrote:
> > > +
> > > +static void report_free_page_func(struct work_struct *work)
> > > +{
> > > + struct virtio_balloon *vb;
> > > + int ret;
> > > +
> > > + vb = container_of(work, struct virtio_balloon, report_free_page_work);
> > > +
> > > + /* Start by sending the received cmd id to host with an outbuf */
> > > + ret = send_cmd_id(vb, vb->cmd_id_received);
> > > + if (unlikely(ret))
> > > + goto err;
> > > +
> > > + ret = walk_free_mem_block(vb, 0, &virtio_balloon_send_free_pages);
> > > + if (unlikely(ret < 0))
> > > + goto err;
> > > +
> > > + /* End by sending a stop id to host with an outbuf */
> > > + ret = send_cmd_id(vb, VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID);
> > > + if (likely(!ret))
> > > + return;
> > > +err:
> > > + dev_err(&vb->vdev->dev, "%s failure: free page vq is broken\n",
> > > + __func__);
> > > +}
> > > +
> > So that's very simple, but it only works well if the whole
> > free list fits in the queue or host processes the queue faster
> > than the guest. What if it doesn't?
>
> This is the case that the virtqueue gets full, and I think we've agreed that
> this is an optimization feature and losing some hints to report isn't
> important, right?
>
> Actually, in the tests, there is no chance to see the ring is full. If we
> check the host patches that were shared before, the device side operation is
> quite simple, it just clears the related bits from the bitmap, and then
> continues to take entries from the virtqueue till the virtqueue gets empty.
>
>
> > If we had restartability you could just drop the lock
> > and wait for a vq interrupt to make more progress, which
> > would be better I think.
> >
>
> Restartability means that caller needs to record the state where it was when
> it stopped last time.
See my comment on the mm patch: if you rotate the previously reported
pages towards the end, then you mostly get restartability for free,
if only per zone.
The only thing remaining will be stopping at a page you already reported.
There aren't many zones so restartability wrt zones is kind of
trivial.
> The controversy is that the free list is not static
> once the lock is dropped, so everything is dynamically changing, including
> the state that was recorded. The method we are using is more prudent, IMHO.
> How about taking the fundamental solution, and seek to improve incrementally
> in the future?
>
>
> Best,
> Wei
I'd like to see kicks happen outside the spinlock. kick with a spinlock
taken looks like a scalability issue that won't be easy to
reproduce but hurt workloads at random unexpected times.
--
MST
^ permalink raw reply
* Re: [PATCH v25 1/2 RESEND] mm: support reporting free page blocks
From: Wei Wang @ 2018-01-26 2:11 UTC (permalink / raw)
To: Andrew Morton
Cc: yang.zhang.wz, virtio-dev, riel, quan.xu0, kvm, mst, nilal,
liliang.opensource, linux-kernel, virtualization, linux-mm,
pbonzini, mhocko
In-Reply-To: <20180125144124.7e9f6e2156b1b940b07aecfc@linux-foundation.org>
On 01/26/2018 06:41 AM, Andrew Morton wrote:
> On Thu, 25 Jan 2018 17:38:27 +0800 Wei Wang <wei.w.wang@intel.com> wrote:
>
>> This patch adds support to walk through the free page blocks in the
>> system and report them via a callback function. Some page blocks may
>> leave the free list after zone->lock is released, so it is the caller's
>> responsibility to either detect or prevent the use of such pages.
>>
>> One use example of this patch is to accelerate live migration by skipping
>> the transfer of free pages reported from the guest. A popular method used
>> by the hypervisor to track which part of memory is written during live
>> migration is to write-protect all the guest memory. So, those pages that
>> are reported as free pages but are written after the report function
>> returns will be captured by the hypervisor, and they will be added to the
>> next round of memory transfer.
> It would be useful if we had some quantitative testing results, so we
> can see the real-world benefits from this change?
>
Sure. Thanks for the reminder, I think I'll also attach this to the
cover letter:
Without this feature, locally live migrating an 8G idle guest takes
~2286 ms. With this featrue, it takes ~260 ms, which reduces the
migration time to ~11%.
Idle guest means a guest which doesn't run any specific workloads after
boots. The improvement depends on how much free memory the guest has,
idle guest is a good case to show the improvement. From the optimization
point of view, having something is better than nothing, IMHO. If the
guest has less free memory, the improvement will be less, but still
better than no improvement.
Best,
Wei
^ permalink raw reply
* Re: [virtio-dev] Re: [PATCH v25 2/2] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
From: Wei Wang @ 2018-01-26 1:40 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: yang.zhang.wz, virtio-dev, riel, quan.xu0, kvm, nilal,
liliang.opensource, linux-kernel, mhocko, linux-mm, pbonzini,
akpm, virtualization
In-Reply-To: <20180125154708-mutt-send-email-mst@kernel.org>
On 01/25/2018 09:49 PM, Michael S. Tsirkin wrote:
> On Thu, Jan 25, 2018 at 05:14:06PM +0800, Wei Wang wrote:
>> +
>> +static void report_free_page_func(struct work_struct *work)
>> +{
>> + struct virtio_balloon *vb;
>> + int ret;
>> +
>> + vb = container_of(work, struct virtio_balloon, report_free_page_work);
>> +
>> + /* Start by sending the received cmd id to host with an outbuf */
>> + ret = send_cmd_id(vb, vb->cmd_id_received);
>> + if (unlikely(ret))
>> + goto err;
>> +
>> + ret = walk_free_mem_block(vb, 0, &virtio_balloon_send_free_pages);
>> + if (unlikely(ret < 0))
>> + goto err;
>> +
>> + /* End by sending a stop id to host with an outbuf */
>> + ret = send_cmd_id(vb, VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID);
>> + if (likely(!ret))
>> + return;
>> +err:
>> + dev_err(&vb->vdev->dev, "%s failure: free page vq is broken\n",
>> + __func__);
>> +}
>> +
> So that's very simple, but it only works well if the whole
> free list fits in the queue or host processes the queue faster
> than the guest. What if it doesn't?
This is the case that the virtqueue gets full, and I think we've agreed
that this is an optimization feature and losing some hints to report
isn't important, right?
Actually, in the tests, there is no chance to see the ring is full. If
we check the host patches that were shared before, the device side
operation is quite simple, it just clears the related bits from the
bitmap, and then continues to take entries from the virtqueue till the
virtqueue gets empty.
> If we had restartability you could just drop the lock
> and wait for a vq interrupt to make more progress, which
> would be better I think.
>
Restartability means that caller needs to record the state where it was
when it stopped last time. The controversy is that the free list is not
static once the lock is dropped, so everything is dynamically changing,
including the state that was recorded. The method we are using is more
prudent, IMHO. How about taking the fundamental solution, and seek to
improve incrementally in the future?
Best,
Wei
^ permalink raw reply
* [PATCH net-next 12/12] tools/virtio: fix smp_mb on x86
From: Michael S. Tsirkin @ 2018-01-25 23:36 UTC (permalink / raw)
To: linux-kernel; +Cc: netdev, John Fastabend, David Miller, virtualization
In-Reply-To: <1516923320-16959-1-git-send-email-mst@redhat.com>
Offset 128 overlaps the last word of the redzone.
Use 132 which is always beyond that.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
tools/virtio/ringtest/main.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
index 593a328..301d59b 100644
--- a/tools/virtio/ringtest/main.h
+++ b/tools/virtio/ringtest/main.h
@@ -111,7 +111,7 @@ static inline void busy_wait(void)
}
#if defined(__x86_64__) || defined(__i386__)
-#define smp_mb() asm volatile("lock; addl $0,-128(%%rsp)" ::: "memory", "cc")
+#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
#else
/*
* Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
--
MST
^ permalink raw reply related
* [PATCH net-next 11/12] tools/virtio: copy READ/WRITE_ONCE
From: Michael S. Tsirkin @ 2018-01-25 23:36 UTC (permalink / raw)
To: linux-kernel; +Cc: netdev, John Fastabend, David Miller, virtualization
In-Reply-To: <1516923320-16959-1-git-send-email-mst@redhat.com>
This is to make ptr_ring test build again.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
tools/virtio/ringtest/main.h | 57 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 57 insertions(+)
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
index 5706e07..593a328 100644
--- a/tools/virtio/ringtest/main.h
+++ b/tools/virtio/ringtest/main.h
@@ -134,4 +134,61 @@ static inline void busy_wait(void)
barrier(); \
} while (0)
+#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
+#define smp_wmb() barrier()
+#else
+#define smp_wmb() smp_release()
+#endif
+
+#ifdef __alpha__
+#define smp_read_barrier_depends() smp_acquire()
+#else
+#define smp_read_barrier_depends() do {} while(0)
+#endif
+
+static __always_inline
+void __read_once_size(const volatile void *p, void *res, int size)
+{
+ switch (size) { \
+ case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break; \
+ case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break; \
+ case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break; \
+ case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break; \
+ default: \
+ barrier(); \
+ __builtin_memcpy((void *)res, (const void *)p, size); \
+ barrier(); \
+ } \
+}
+
+static __always_inline void __write_once_size(volatile void *p, void *res, int size)
+{
+ switch (size) {
+ case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break;
+ case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break;
+ case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break;
+ case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break;
+ default:
+ barrier();
+ __builtin_memcpy((void *)p, (const void *)res, size);
+ barrier();
+ }
+}
+
+#define READ_ONCE(x) \
+({ \
+ union { typeof(x) __val; char __c[1]; } __u; \
+ __read_once_size(&(x), __u.__c, sizeof(x)); \
+ smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \
+ __u.__val; \
+})
+
+#define WRITE_ONCE(x, val) \
+({ \
+ union { typeof(x) __val; char __c[1]; } __u = \
+ { .__val = (typeof(x)) (val) }; \
+ __write_once_size(&(x), __u.__c, sizeof(x)); \
+ __u.__val; \
+})
+
#endif
--
MST
^ permalink raw reply related
* [PATCH net-next 10/12] tools/virtio: more stubs to fix tools build
From: Michael S. Tsirkin @ 2018-01-25 23:36 UTC (permalink / raw)
To: linux-kernel; +Cc: netdev, John Fastabend, David Miller, virtualization
In-Reply-To: <1516923320-16959-1-git-send-email-mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
tools/virtio/linux/kernel.h | 2 +-
tools/virtio/linux/thread_info.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
create mode 100644 tools/virtio/linux/thread_info.h
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index 395521a..fca8381 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -118,7 +118,7 @@ static inline void free_page(unsigned long addr)
#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
-#define WARN_ON_ONCE(cond) ((cond) && fprintf (stderr, "WARNING\n"))
+#define WARN_ON_ONCE(cond) ((cond) ? fprintf (stderr, "WARNING\n") : 0)
#define min(x, y) ({ \
typeof(x) _min1 = (x); \
diff --git a/tools/virtio/linux/thread_info.h b/tools/virtio/linux/thread_info.h
new file mode 100644
index 0000000..e0f610d
--- /dev/null
+++ b/tools/virtio/linux/thread_info.h
@@ -0,0 +1 @@
+#define check_copy_size(A, B, C) (1)
--
MST
^ permalink raw reply related
* [PATCH net-next 09/12] tools/virtio: switch to __ptr_ring_empty
From: Michael S. Tsirkin @ 2018-01-25 23:36 UTC (permalink / raw)
To: linux-kernel; +Cc: netdev, John Fastabend, David Miller, virtualization
In-Reply-To: <1516923320-16959-1-git-send-email-mst@redhat.com>
We don't rely on lockless guarantees, but it
seems cleaner than inverting __ptr_ring_peek.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
tools/virtio/ringtest/ptr_ring.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c
index e6e8130..477899c 100644
--- a/tools/virtio/ringtest/ptr_ring.c
+++ b/tools/virtio/ringtest/ptr_ring.c
@@ -187,7 +187,7 @@ bool enable_kick()
bool avail_empty()
{
- return !__ptr_ring_peek(&array);
+ return __ptr_ring_empty(&array);
}
bool use_buf(unsigned *lenp, void **bufp)
--
MST
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox