From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Michael S. Tsirkin" Subject: [PATCH RFC v8 01/11] vhost: option to fetch descriptors through an independent struct Date: Thu, 11 Jun 2020 07:34:16 -0400 Message-ID: <20200611113404.17810-2-mst@redhat.com> References: <20200611113404.17810-1-mst@redhat.com> Mime-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Return-path: In-Reply-To: <20200611113404.17810-1-mst@redhat.com> Content-Disposition: inline List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: virtualization-bounces@lists.linux-foundation.org Sender: "Virtualization" To: linux-kernel@vger.kernel.org Cc: netdev@vger.kernel.org, eperezma@redhat.com, kvm@vger.kernel.org, virtualization@lists.linux-foundation.org List-Id: virtualization@lists.linuxfoundation.org VGhlIGlkZWEgaXMgdG8gc3VwcG9ydCBtdWx0aXBsZSByaW5nIGZvcm1hdHMgYnkgY29udmVydGlu Zwp0byBhIGZvcm1hdC1pbmRlcGVuZGVudCBhcnJheSBvZiBkZXNjcmlwdG9ycy4KClRoaXMgY29z dHMgZXh0cmEgY3ljbGVzLCBidXQgd2UgZ2FpbiBpbiBhYmlsaXR5CnRvIGZldGNoIGEgYmF0Y2gg b2YgZGVzY3JpcHRvcnMgaW4gb25lIGdvLCB3aGljaAppcyBnb29kIGZvciBjb2RlIGNhY2hlIGxv Y2FsaXR5LgoKV2hlbiB1c2VkLCB0aGlzIGNhdXNlcyBhIG1pbm9yIHBlcmZvcm1hbmNlIGRlZ3Jh ZGF0aW9uLAppdCdzIGJlZW4ga2VwdCBhcyBzaW1wbGUgYXMgcG9zc2libGUgZm9yIGVhc2Ugb2Yg cmV2aWV3LgpBIGZvbGxvdy11cCBwYXRjaCBnZXRzIHVzIGJhY2sgdGhlIHBlcmZvcm1hbmNlIGJ5 IGFkZGluZyBiYXRjaGluZy4KClRvIHNpbXBsaWZ5IGJlbmNobWFya2luZywgSSBrZXB0IHRoZSBv bGQgY29kZSBhcm91bmQgc28gb25lIGNhbiBzd2l0Y2gKYmFjayBhbmQgZm9ydGggYmV0d2VlbiBv bGQgYW5kIG5ldyBjb2RlLiBUaGlzIHdpbGwgZ28gYXdheSBpbiB0aGUgZmluYWwKc3VibWlzc2lv bi4KClNpZ25lZC1vZmYtYnk6IE1pY2hhZWwgUy4gVHNpcmtpbiA8bXN0QHJlZGhhdC5jb20+ClNp Z25lZC1vZmYtYnk6IEV1Z2VuaW8gUMOpcmV6IDxlcGVyZXptYUByZWRoYXQuY29tPgpMaW5rOiBo dHRwczovL2xvcmUua2VybmVsLm9yZy9yLzIwMjAwNDAxMTgzMTE4LjgzMzQtMi1lcGVyZXptYUBy ZWRoYXQuY29tClNpZ25lZC1vZmYtYnk6IE1pY2hhZWwgUy4gVHNpcmtpbiA8bXN0QHJlZGhhdC5j b20+Ci0tLQogZHJpdmVycy92aG9zdC92aG9zdC5jIHwgMzA1ICsrKysrKysrKysrKysrKysrKysr KysrKysrKysrKysrKysrKysrKysrLQogZHJpdmVycy92aG9zdC92aG9zdC5oIHwgIDE2ICsrKwog MiBmaWxlcyBjaGFuZ2VkLCAzMjAgaW5zZXJ0aW9ucygrKSwgMSBkZWxldGlvbigtKQoKZGlmZiAt LWdpdCBhL2RyaXZlcnMvdmhvc3Qvdmhvc3QuYyBiL2RyaXZlcnMvdmhvc3Qvdmhvc3QuYwppbmRl eCAxNzJkYTA5MjEwN2UuLjExNDMzZDcwOTY1MSAxMDA2NDQKLS0tIGEvZHJpdmVycy92aG9zdC92 aG9zdC5jCisrKyBiL2RyaXZlcnMvdmhvc3Qvdmhvc3QuYwpAQCAtMzAzLDYgKzMwMyw3IEBAIHN0 YXRpYyB2b2lkIHZob3N0X3ZxX3Jlc2V0KHN0cnVjdCB2aG9zdF9kZXYgKmRldiwKIAkJCSAgIHN0 cnVjdCB2aG9zdF92aXJ0cXVldWUgKnZxKQogewogCXZxLT5udW0gPSAxOworCXZxLT5uZGVzY3Mg PSAwOwogCXZxLT5kZXNjID0gTlVMTDsKIAl2cS0+YXZhaWwgPSBOVUxMOwogCXZxLT51c2VkID0g TlVMTDsKQEAgLTM3Myw2ICszNzQsOSBAQCBzdGF0aWMgaW50IHZob3N0X3dvcmtlcih2b2lkICpk YXRhKQogCiBzdGF0aWMgdm9pZCB2aG9zdF92cV9mcmVlX2lvdmVjcyhzdHJ1Y3Qgdmhvc3Rfdmly dHF1ZXVlICp2cSkKIHsKKwlrZnJlZSh2cS0+ZGVzY3MpOworCXZxLT5kZXNjcyA9IE5VTEw7CisJ dnEtPm1heF9kZXNjcyA9IDA7CiAJa2ZyZWUodnEtPmluZGlyZWN0KTsKIAl2cS0+aW5kaXJlY3Qg PSBOVUxMOwogCWtmcmVlKHZxLT5sb2cpOwpAQCAtMzg5LDYgKzM5MywxMCBAQCBzdGF0aWMgbG9u ZyB2aG9zdF9kZXZfYWxsb2NfaW92ZWNzKHN0cnVjdCB2aG9zdF9kZXYgKmRldikKIAogCWZvciAo aSA9IDA7IGkgPCBkZXYtPm52cXM7ICsraSkgewogCQl2cSA9IGRldi0+dnFzW2ldOworCQl2cS0+ bWF4X2Rlc2NzID0gZGV2LT5pb3ZfbGltaXQ7CisJCXZxLT5kZXNjcyA9IGttYWxsb2NfYXJyYXko dnEtPm1heF9kZXNjcywKKwkJCQkJICBzaXplb2YoKnZxLT5kZXNjcyksCisJCQkJCSAgR0ZQX0tF Uk5FTCk7CiAJCXZxLT5pbmRpcmVjdCA9IGttYWxsb2NfYXJyYXkoVUlPX01BWElPViwKIAkJCQkJ ICAgICBzaXplb2YoKnZxLT5pbmRpcmVjdCksCiAJCQkJCSAgICAgR0ZQX0tFUk5FTCk7CkBAIC0z OTYsNyArNDA0LDcgQEAgc3RhdGljIGxvbmcgdmhvc3RfZGV2X2FsbG9jX2lvdmVjcyhzdHJ1Y3Qg dmhvc3RfZGV2ICpkZXYpCiAJCQkJCUdGUF9LRVJORUwpOwogCQl2cS0+aGVhZHMgPSBrbWFsbG9j X2FycmF5KGRldi0+aW92X2xpbWl0LCBzaXplb2YoKnZxLT5oZWFkcyksCiAJCQkJCSAgR0ZQX0tF Uk5FTCk7Ci0JCWlmICghdnEtPmluZGlyZWN0IHx8ICF2cS0+bG9nIHx8ICF2cS0+aGVhZHMpCisJ CWlmICghdnEtPmluZGlyZWN0IHx8ICF2cS0+bG9nIHx8ICF2cS0+aGVhZHMgfHwgIXZxLT5kZXNj cykKIAkJCWdvdG8gZXJyX25vbWVtOwogCX0KIAlyZXR1cm4gMDsKQEAgLTQ4OCw2ICs0OTYsOCBA QCB2b2lkIHZob3N0X2Rldl9pbml0KHN0cnVjdCB2aG9zdF9kZXYgKmRldiwKIAogCWZvciAoaSA9 IDA7IGkgPCBkZXYtPm52cXM7ICsraSkgewogCQl2cSA9IGRldi0+dnFzW2ldOworCQl2cS0+ZGVz Y3MgPSBOVUxMOworCQl2cS0+bWF4X2Rlc2NzID0gMDsKIAkJdnEtPmxvZyA9IE5VTEw7CiAJCXZx LT5pbmRpcmVjdCA9IE5VTEw7CiAJCXZxLT5oZWFkcyA9IE5VTEw7CkBAIC0yMzE1LDYgKzIzMjUs Mjk5IEBAIGludCB2aG9zdF9nZXRfdnFfZGVzYyhzdHJ1Y3Qgdmhvc3RfdmlydHF1ZXVlICp2cSwK IH0KIEVYUE9SVF9TWU1CT0xfR1BMKHZob3N0X2dldF92cV9kZXNjKTsKIAorc3RhdGljIHN0cnVj dCB2aG9zdF9kZXNjICpwZWVrX3NwbGl0X2Rlc2Moc3RydWN0IHZob3N0X3ZpcnRxdWV1ZSAqdnEp Cit7CisJQlVHX09OKCF2cS0+bmRlc2NzKTsKKwlyZXR1cm4gJnZxLT5kZXNjc1t2cS0+bmRlc2Nz IC0gMV07Cit9CisKK3N0YXRpYyB2b2lkIHBvcF9zcGxpdF9kZXNjKHN0cnVjdCB2aG9zdF92aXJ0 cXVldWUgKnZxKQoreworCUJVR19PTighdnEtPm5kZXNjcyk7CisJLS12cS0+bmRlc2NzOworfQor CisjZGVmaW5lIFZIT1NUX0RFU0NfRkxBR1MgKFZSSU5HX0RFU0NfRl9JTkRJUkVDVCB8IFZSSU5H X0RFU0NfRl9XUklURSB8IFwKKwkJCSAgVlJJTkdfREVTQ19GX05FWFQpCitzdGF0aWMgaW50IHB1 c2hfc3BsaXRfZGVzYyhzdHJ1Y3Qgdmhvc3RfdmlydHF1ZXVlICp2cSwgc3RydWN0IHZyaW5nX2Rl c2MgKmRlc2MsIHUxNiBpZCkKK3sKKwlzdHJ1Y3Qgdmhvc3RfZGVzYyAqaDsKKworCWlmICh1bmxp a2VseSh2cS0+bmRlc2NzID49IHZxLT5tYXhfZGVzY3MpKQorCQlyZXR1cm4gLUVJTlZBTDsKKwlo ID0gJnZxLT5kZXNjc1t2cS0+bmRlc2NzKytdOworCWgtPmFkZHIgPSB2aG9zdDY0X3RvX2NwdSh2 cSwgZGVzYy0+YWRkcik7CisJaC0+bGVuID0gdmhvc3QzMl90b19jcHUodnEsIGRlc2MtPmxlbik7 CisJaC0+ZmxhZ3MgPSB2aG9zdDE2X3RvX2NwdSh2cSwgZGVzYy0+ZmxhZ3MpICYgVkhPU1RfREVT Q19GTEFHUzsKKwloLT5pZCA9IGlkOworCisJcmV0dXJuIDA7Cit9CisKK3N0YXRpYyBpbnQgZmV0 Y2hfaW5kaXJlY3RfZGVzY3Moc3RydWN0IHZob3N0X3ZpcnRxdWV1ZSAqdnEsCisJCQkJc3RydWN0 IHZob3N0X2Rlc2MgKmluZGlyZWN0LAorCQkJCXUxNiBoZWFkKQoreworCXN0cnVjdCB2cmluZ19k ZXNjIGRlc2M7CisJdW5zaWduZWQgaW50IGkgPSAwLCBjb3VudCwgZm91bmQgPSAwOworCXUzMiBs ZW4gPSBpbmRpcmVjdC0+bGVuOworCXN0cnVjdCBpb3ZfaXRlciBmcm9tOworCWludCByZXQ7CisK KwkvKiBTYW5pdHkgY2hlY2sgKi8KKwlpZiAodW5saWtlbHkobGVuICUgc2l6ZW9mIGRlc2MpKSB7 CisJCXZxX2Vycih2cSwgIkludmFsaWQgbGVuZ3RoIGluIGluZGlyZWN0IGRlc2NyaXB0b3I6ICIK KwkJICAgICAgICJsZW4gMHglbGx4IG5vdCBtdWx0aXBsZSBvZiAweCV6eFxuIiwKKwkJICAgICAg ICh1bnNpZ25lZCBsb25nIGxvbmcpbGVuLAorCQkgICAgICAgc2l6ZW9mIGRlc2MpOworCQlyZXR1 cm4gLUVJTlZBTDsKKwl9CisKKwlyZXQgPSB0cmFuc2xhdGVfZGVzYyh2cSwgaW5kaXJlY3QtPmFk ZHIsIGxlbiwgdnEtPmluZGlyZWN0LAorCQkJICAgICBVSU9fTUFYSU9WLCBWSE9TVF9BQ0NFU1Nf Uk8pOworCWlmICh1bmxpa2VseShyZXQgPCAwKSkgeworCQlpZiAocmV0ICE9IC1FQUdBSU4pCisJ CQl2cV9lcnIodnEsICJUcmFuc2xhdGlvbiBmYWlsdXJlICVkIGluIGluZGlyZWN0LlxuIiwgcmV0 KTsKKwkJcmV0dXJuIHJldDsKKwl9CisJaW92X2l0ZXJfaW5pdCgmZnJvbSwgUkVBRCwgdnEtPmlu ZGlyZWN0LCByZXQsIGxlbik7CisKKwkvKiBXZSB3aWxsIHVzZSB0aGUgcmVzdWx0IGFzIGFuIGFk ZHJlc3MgdG8gcmVhZCBmcm9tLCBzbyBtb3N0CisJICogYXJjaGl0ZWN0dXJlcyBvbmx5IG5lZWQg YSBjb21waWxlciBiYXJyaWVyIGhlcmUuICovCisJcmVhZF9iYXJyaWVyX2RlcGVuZHMoKTsKKwor CWNvdW50ID0gbGVuIC8gc2l6ZW9mIGRlc2M7CisJLyogQnVmZmVycyBhcmUgY2hhaW5lZCB2aWEg YSAxNiBiaXQgbmV4dCBmaWVsZCwgc28KKwkgKiB3ZSBjYW4gaGF2ZSBhdCBtb3N0IDJeMTYgb2Yg dGhlc2UuICovCisJaWYgKHVubGlrZWx5KGNvdW50ID4gVVNIUlRfTUFYICsgMSkpIHsKKwkJdnFf ZXJyKHZxLCAiSW5kaXJlY3QgYnVmZmVyIGxlbmd0aCB0b28gYmlnOiAlZFxuIiwKKwkJICAgICAg IGluZGlyZWN0LT5sZW4pOworCQlyZXR1cm4gLUUyQklHOworCX0KKwlpZiAodW5saWtlbHkodnEt Pm5kZXNjcyArIGNvdW50ID4gdnEtPm1heF9kZXNjcykpIHsKKwkJdnFfZXJyKHZxLCAiVG9vIG1h bnkgaW5kaXJlY3QgKyBkaXJlY3QgZGVzY3M6ICVkICsgJWRcbiIsCisJCSAgICAgICB2cS0+bmRl c2NzLCBpbmRpcmVjdC0+bGVuKTsKKwkJcmV0dXJuIC1FMkJJRzsKKwl9CisKKwlkbyB7CisJCWlm ICh1bmxpa2VseSgrK2ZvdW5kID4gY291bnQpKSB7CisJCQl2cV9lcnIodnEsICJMb29wIGRldGVj dGVkOiBsYXN0IG9uZSBhdCAldSAiCisJCQkgICAgICAgImluZGlyZWN0IHNpemUgJXVcbiIsCisJ CQkgICAgICAgaSwgY291bnQpOworCQkJcmV0dXJuIC1FSU5WQUw7CisJCX0KKwkJaWYgKHVubGlr ZWx5KCFjb3B5X2Zyb21faXRlcl9mdWxsKCZkZXNjLCBzaXplb2YoZGVzYyksICZmcm9tKSkpIHsK KwkJCXZxX2Vycih2cSwgIkZhaWxlZCBpbmRpcmVjdCBkZXNjcmlwdG9yOiBpZHggJWQsICV6eFxu IiwKKwkJCSAgICAgICBpLCAoc2l6ZV90KWluZGlyZWN0LT5hZGRyICsgaSAqIHNpemVvZiBkZXNj KTsKKwkJCXJldHVybiAtRUlOVkFMOworCQl9CisJCWlmICh1bmxpa2VseShkZXNjLmZsYWdzICYg Y3B1X3RvX3Zob3N0MTYodnEsIFZSSU5HX0RFU0NfRl9JTkRJUkVDVCkpKSB7CisJCQl2cV9lcnIo dnEsICJOZXN0ZWQgaW5kaXJlY3QgZGVzY3JpcHRvcjogaWR4ICVkLCAlenhcbiIsCisJCQkgICAg ICAgaSwgKHNpemVfdClpbmRpcmVjdC0+YWRkciArIGkgKiBzaXplb2YgZGVzYyk7CisJCQlyZXR1 cm4gLUVJTlZBTDsKKwkJfQorCisJCS8qIE5vdGU6IHB1c2hfc3BsaXRfZGVzYyBjYW4ndCBmYWls IGhlcmU6CisJCSAqIHdlIG5ldmVyIGZldGNoIHVubGVzcyB0aGVyZSdzIHNwYWNlLiAqLworCQly ZXQgPSBwdXNoX3NwbGl0X2Rlc2ModnEsICZkZXNjLCBoZWFkKTsKKwkJV0FSTl9PTihyZXQpOwor CX0gd2hpbGUgKChpID0gbmV4dF9kZXNjKHZxLCAmZGVzYykpICE9IC0xKTsKKwlyZXR1cm4gMDsK K30KKworLyogVGhpcyBmdW5jdGlvbiByZXR1cm5zIGEgdmFsdWUgPiAwIGlmIGEgZGVzY3JpcHRv ciB3YXMgZm91bmQsIG9yIDAgaWYgbm9uZSB3ZXJlIGZvdW5kLgorICogQSBuZWdhdGl2ZSBjb2Rl IGlzIHJldHVybmVkIG9uIGVycm9yLiAqLworc3RhdGljIGludCBmZXRjaF9kZXNjcyhzdHJ1Y3Qg dmhvc3RfdmlydHF1ZXVlICp2cSkKK3sKKwl1bnNpZ25lZCBpbnQgaSwgaGVhZCwgZm91bmQgPSAw OworCXN0cnVjdCB2aG9zdF9kZXNjICpsYXN0OworCXN0cnVjdCB2cmluZ19kZXNjIGRlc2M7CisJ X192aXJ0aW8xNiBhdmFpbF9pZHg7CisJX192aXJ0aW8xNiByaW5nX2hlYWQ7CisJdTE2IGxhc3Rf YXZhaWxfaWR4OworCWludCByZXQ7CisKKwkvKiBDaGVjayBpdCBpc24ndCBkb2luZyB2ZXJ5IHN0 cmFuZ2UgdGhpbmdzIHdpdGggZGVzY3JpcHRvciBudW1iZXJzLiAqLworCWxhc3RfYXZhaWxfaWR4 ID0gdnEtPmxhc3RfYXZhaWxfaWR4OworCisJaWYgKHZxLT5hdmFpbF9pZHggPT0gdnEtPmxhc3Rf YXZhaWxfaWR4KSB7CisJCWlmICh1bmxpa2VseSh2aG9zdF9nZXRfYXZhaWxfaWR4KHZxLCAmYXZh aWxfaWR4KSkpIHsKKwkJCXZxX2Vycih2cSwgIkZhaWxlZCB0byBhY2Nlc3MgYXZhaWwgaWR4IGF0 ICVwXG4iLAorCQkJCSZ2cS0+YXZhaWwtPmlkeCk7CisJCQlyZXR1cm4gLUVGQVVMVDsKKwkJfQor CQl2cS0+YXZhaWxfaWR4ID0gdmhvc3QxNl90b19jcHUodnEsIGF2YWlsX2lkeCk7CisKKwkJaWYg KHVubGlrZWx5KCh1MTYpKHZxLT5hdmFpbF9pZHggLSBsYXN0X2F2YWlsX2lkeCkgPiB2cS0+bnVt KSkgeworCQkJdnFfZXJyKHZxLCAiR3Vlc3QgbW92ZWQgdXNlZCBpbmRleCBmcm9tICV1IHRvICV1 IiwKKwkJCQlsYXN0X2F2YWlsX2lkeCwgdnEtPmF2YWlsX2lkeCk7CisJCQlyZXR1cm4gLUVGQVVM VDsKKwkJfQorCisJCS8qIElmIHRoZXJlJ3Mgbm90aGluZyBuZXcgc2luY2UgbGFzdCB3ZSBsb29r ZWQsIHJldHVybgorCQkgKiBpbnZhbGlkLgorCQkgKi8KKwkJaWYgKHZxLT5hdmFpbF9pZHggPT0g bGFzdF9hdmFpbF9pZHgpCisJCQlyZXR1cm4gMDsKKworCQkvKiBPbmx5IGdldCBhdmFpbCByaW5n IGVudHJpZXMgYWZ0ZXIgdGhleSBoYXZlIGJlZW4KKwkJICogZXhwb3NlZCBieSBndWVzdC4KKwkJ ICovCisJCXNtcF9ybWIoKTsKKwl9CisKKwkvKiBHcmFiIHRoZSBuZXh0IGRlc2NyaXB0b3IgbnVt YmVyIHRoZXkncmUgYWR2ZXJ0aXNpbmcgKi8KKwlpZiAodW5saWtlbHkodmhvc3RfZ2V0X2F2YWls X2hlYWQodnEsICZyaW5nX2hlYWQsIGxhc3RfYXZhaWxfaWR4KSkpIHsKKwkJdnFfZXJyKHZxLCAi RmFpbGVkIHRvIHJlYWQgaGVhZDogaWR4ICVkIGFkZHJlc3MgJXBcbiIsCisJCSAgICAgICBsYXN0 X2F2YWlsX2lkeCwKKwkJICAgICAgICZ2cS0+YXZhaWwtPnJpbmdbbGFzdF9hdmFpbF9pZHggJSB2 cS0+bnVtXSk7CisJCXJldHVybiAtRUZBVUxUOworCX0KKworCWhlYWQgPSB2aG9zdDE2X3RvX2Nw dSh2cSwgcmluZ19oZWFkKTsKKworCS8qIElmIHRoZWlyIG51bWJlciBpcyBzaWxseSwgdGhhdCdz IGFuIGVycm9yLiAqLworCWlmICh1bmxpa2VseShoZWFkID49IHZxLT5udW0pKSB7CisJCXZxX2Vy cih2cSwgIkd1ZXN0IHNheXMgaW5kZXggJXUgPiAldSBpcyBhdmFpbGFibGUiLAorCQkgICAgICAg aGVhZCwgdnEtPm51bSk7CisJCXJldHVybiAtRUlOVkFMOworCX0KKworCWkgPSBoZWFkOworCWRv IHsKKwkJaWYgKHVubGlrZWx5KGkgPj0gdnEtPm51bSkpIHsKKwkJCXZxX2Vycih2cSwgIkRlc2Mg aW5kZXggaXMgJXUgPiAldSwgaGVhZCA9ICV1IiwKKwkJCSAgICAgICBpLCB2cS0+bnVtLCBoZWFk KTsKKwkJCXJldHVybiAtRUlOVkFMOworCQl9CisJCWlmICh1bmxpa2VseSgrK2ZvdW5kID4gdnEt Pm51bSkpIHsKKwkJCXZxX2Vycih2cSwgIkxvb3AgZGV0ZWN0ZWQ6IGxhc3Qgb25lIGF0ICV1ICIK KwkJCSAgICAgICAidnEgc2l6ZSAldSBoZWFkICV1XG4iLAorCQkJICAgICAgIGksIHZxLT5udW0s IGhlYWQpOworCQkJcmV0dXJuIC1FSU5WQUw7CisJCX0KKwkJcmV0ID0gdmhvc3RfZ2V0X2Rlc2Mo dnEsICZkZXNjLCBpKTsKKwkJaWYgKHVubGlrZWx5KHJldCkpIHsKKwkJCXZxX2Vycih2cSwgIkZh aWxlZCB0byBnZXQgZGVzY3JpcHRvcjogaWR4ICVkIGFkZHIgJXBcbiIsCisJCQkgICAgICAgaSwg dnEtPmRlc2MgKyBpKTsKKwkJCXJldHVybiAtRUZBVUxUOworCQl9CisJCXJldCA9IHB1c2hfc3Bs aXRfZGVzYyh2cSwgJmRlc2MsIGhlYWQpOworCQlpZiAodW5saWtlbHkocmV0KSkgeworCQkJdnFf ZXJyKHZxLCAiRmFpbGVkIHRvIHNhdmUgZGVzY3JpcHRvcjogaWR4ICVkXG4iLCBpKTsKKwkJCXJl dHVybiAtRUlOVkFMOworCQl9CisJfSB3aGlsZSAoKGkgPSBuZXh0X2Rlc2ModnEsICZkZXNjKSkg IT0gLTEpOworCisJbGFzdCA9IHBlZWtfc3BsaXRfZGVzYyh2cSk7CisJaWYgKHVubGlrZWx5KGxh c3QtPmZsYWdzICYgVlJJTkdfREVTQ19GX0lORElSRUNUKSkgeworCQlwb3Bfc3BsaXRfZGVzYyh2 cSk7CisJCXJldCA9IGZldGNoX2luZGlyZWN0X2Rlc2NzKHZxLCBsYXN0LCBoZWFkKTsKKwkJaWYg KHVubGlrZWx5KHJldCA8IDApKSB7CisJCQlpZiAocmV0ICE9IC1FQUdBSU4pCisJCQkJdnFfZXJy KHZxLCAiRmFpbHVyZSBkZXRlY3RlZCAiCisJCQkJICAgICAgICJpbiBpbmRpcmVjdCBkZXNjcmlw dG9yIGF0IGlkeCAlZFxuIiwgaGVhZCk7CisJCQlyZXR1cm4gcmV0OworCQl9CisJfQorCisJLyog QXNzdW1lIG5vdGlmaWNhdGlvbnMgZnJvbSBndWVzdCBhcmUgZGlzYWJsZWQgYXQgdGhpcyBwb2lu dCwKKwkgKiBpZiB0aGV5IGFyZW4ndCB3ZSB3b3VsZCBuZWVkIHRvIHVwZGF0ZSBhdmFpbF9ldmVu dCBpbmRleC4gKi8KKwlCVUdfT04oISh2cS0+dXNlZF9mbGFncyAmIFZSSU5HX1VTRURfRl9OT19O T1RJRlkpKTsKKworCS8qIE9uIHN1Y2Nlc3MsIGluY3JlbWVudCBhdmFpbCBpbmRleC4gKi8KKwl2 cS0+bGFzdF9hdmFpbF9pZHgrKzsKKworCXJldHVybiAxOworfQorCisvKiBUaGlzIGxvb2tzIGlu IHRoZSB2aXJ0cXVldWUgYW5kIGZvciB0aGUgZmlyc3QgYXZhaWxhYmxlIGJ1ZmZlciwgYW5kIGNv bnZlcnRzCisgKiBpdCB0byBhbiBpb3ZlYyBmb3IgY29udmVuaWVudCBhY2Nlc3MuICBTaW5jZSBk ZXNjcmlwdG9ycyBjb25zaXN0IG9mIHNvbWUKKyAqIG51bWJlciBvZiBvdXRwdXQgdGhlbiBzb21l IG51bWJlciBvZiBpbnB1dCBkZXNjcmlwdG9ycywgaXQncyBhY3R1YWxseSB0d28KKyAqIGlvdmVj cywgYnV0IHdlIHBhY2sgdGhlbSBpbnRvIG9uZSBhbmQgbm90ZSBob3cgbWFueSBvZiBlYWNoIHRo ZXJlIHdlcmUuCisgKgorICogVGhpcyBmdW5jdGlvbiByZXR1cm5zIHRoZSBkZXNjcmlwdG9yIG51 bWJlciBmb3VuZCwgb3IgdnEtPm51bSAod2hpY2ggaXMKKyAqIG5ldmVyIGEgdmFsaWQgZGVzY3Jp cHRvciBudW1iZXIpIGlmIG5vbmUgd2FzIGZvdW5kLiAgQSBuZWdhdGl2ZSBjb2RlIGlzCisgKiBy ZXR1cm5lZCBvbiBlcnJvci4gKi8KK2ludCB2aG9zdF9nZXRfdnFfZGVzY19iYXRjaChzdHJ1Y3Qg dmhvc3RfdmlydHF1ZXVlICp2cSwKKwkJICAgICAgc3RydWN0IGlvdmVjIGlvdltdLCB1bnNpZ25l ZCBpbnQgaW92X3NpemUsCisJCSAgICAgIHVuc2lnbmVkIGludCAqb3V0X251bSwgdW5zaWduZWQg aW50ICppbl9udW0sCisJCSAgICAgIHN0cnVjdCB2aG9zdF9sb2cgKmxvZywgdW5zaWduZWQgaW50 ICpsb2dfbnVtKQoreworCWludCByZXQgPSBmZXRjaF9kZXNjcyh2cSk7CisJaW50IGk7CisKKwlp ZiAocmV0IDw9IDApCisJCWdvdG8gZXJyX2ZldGNoOworCisJLyogTm93IGNvbnZlcnQgdG8gSU9W ICovCisJLyogV2hlbiB3ZSBzdGFydCB0aGVyZSBhcmUgbm9uZSBvZiBlaXRoZXIgaW5wdXQgbm9y IG91dHB1dC4gKi8KKwkqb3V0X251bSA9ICppbl9udW0gPSAwOworCWlmICh1bmxpa2VseShsb2cp KQorCQkqbG9nX251bSA9IDA7CisKKwlmb3IgKGkgPSAwOyBpIDwgdnEtPm5kZXNjczsgKytpKSB7 CisJCXVuc2lnbmVkIGlvdl9jb3VudCA9ICppbl9udW0gKyAqb3V0X251bTsKKwkJc3RydWN0IHZo b3N0X2Rlc2MgKmRlc2MgPSAmdnEtPmRlc2NzW2ldOworCQlpbnQgYWNjZXNzOworCisJCWlmIChk ZXNjLT5mbGFncyAmIH5WSE9TVF9ERVNDX0ZMQUdTKSB7CisJCQl2cV9lcnIodnEsICJVbmV4cGVj dGVkIGZsYWdzOiAweCV4IGF0IGRlc2NyaXB0b3IgaWQgMHgleFxuIiwKKwkJCSAgICAgICBkZXNj LT5mbGFncywgZGVzYy0+aWQpOworCQkJcmV0ID0gLUVJTlZBTDsKKwkJCWdvdG8gZXJyOworCQl9 CisJCWlmIChkZXNjLT5mbGFncyAmIFZSSU5HX0RFU0NfRl9XUklURSkKKwkJCWFjY2VzcyA9IFZI T1NUX0FDQ0VTU19XTzsKKwkJZWxzZQorCQkJYWNjZXNzID0gVkhPU1RfQUNDRVNTX1JPOworCQly ZXQgPSB0cmFuc2xhdGVfZGVzYyh2cSwgZGVzYy0+YWRkciwKKwkJCQkgICAgIGRlc2MtPmxlbiwg aW92ICsgaW92X2NvdW50LAorCQkJCSAgICAgaW92X3NpemUgLSBpb3ZfY291bnQsIGFjY2Vzcyk7 CisJCWlmICh1bmxpa2VseShyZXQgPCAwKSkgeworCQkJaWYgKHJldCAhPSAtRUFHQUlOKQorCQkJ CXZxX2Vycih2cSwgIlRyYW5zbGF0aW9uIGZhaWx1cmUgJWQgZGVzY3JpcHRvciBpZHggJWRcbiIs CisJCQkJCXJldCwgaSk7CisJCQlnb3RvIGVycjsKKwkJfQorCQlpZiAoYWNjZXNzID09IFZIT1NU X0FDQ0VTU19XTykgeworCQkJLyogSWYgdGhpcyBpcyBhbiBpbnB1dCBkZXNjcmlwdG9yLAorCQkJ ICogaW5jcmVtZW50IHRoYXQgY291bnQuICovCisJCQkqaW5fbnVtICs9IHJldDsKKwkJCWlmICh1 bmxpa2VseShsb2cgJiYgcmV0KSkgeworCQkJCWxvZ1sqbG9nX251bV0uYWRkciA9IGRlc2MtPmFk ZHI7CisJCQkJbG9nWypsb2dfbnVtXS5sZW4gPSBkZXNjLT5sZW47CisJCQkJKysqbG9nX251bTsK KwkJCX0KKwkJfSBlbHNlIHsKKwkJCS8qIElmIGl0J3MgYW4gb3V0cHV0IGRlc2NyaXB0b3IsIHRo ZXkncmUgYWxsIHN1cHBvc2VkCisJCQkgKiB0byBjb21lIGJlZm9yZSBhbnkgaW5wdXQgZGVzY3Jp cHRvcnMuICovCisJCQlpZiAodW5saWtlbHkoKmluX251bSkpIHsKKwkJCQl2cV9lcnIodnEsICJE ZXNjcmlwdG9yIGhhcyBvdXQgYWZ0ZXIgaW46ICIKKwkJCQkgICAgICAgImlkeCAlZFxuIiwgaSk7 CisJCQkJcmV0ID0gLUVJTlZBTDsKKwkJCQlnb3RvIGVycjsKKwkJCX0KKwkJCSpvdXRfbnVtICs9 IHJldDsKKwkJfQorCisJCXJldCA9IGRlc2MtPmlkOworCX0KKworCXZxLT5uZGVzY3MgPSAwOwor CisJcmV0dXJuIHJldDsKKworZXJyOgorCXZob3N0X2Rpc2NhcmRfdnFfZGVzYyh2cSwgMSk7Citl cnJfZmV0Y2g6CisJdnEtPm5kZXNjcyA9IDA7CisKKwlyZXR1cm4gcmV0ID8gcmV0IDogdnEtPm51 bTsKK30KK0VYUE9SVF9TWU1CT0xfR1BMKHZob3N0X2dldF92cV9kZXNjX2JhdGNoKTsKKwogLyog UmV2ZXJzZSB0aGUgZWZmZWN0IG9mIHZob3N0X2dldF92cV9kZXNjLiBVc2VmdWwgZm9yIGVycm9y IGhhbmRsaW5nLiAqLwogdm9pZCB2aG9zdF9kaXNjYXJkX3ZxX2Rlc2Moc3RydWN0IHZob3N0X3Zp cnRxdWV1ZSAqdnEsIGludCBuKQogewpkaWZmIC0tZ2l0IGEvZHJpdmVycy92aG9zdC92aG9zdC5o IGIvZHJpdmVycy92aG9zdC92aG9zdC5oCmluZGV4IGM4ZTk2YTA5NWQzYi4uODcwODlkNTE0OTBk IDEwMDY0NAotLS0gYS9kcml2ZXJzL3Zob3N0L3Zob3N0LmgKKysrIGIvZHJpdmVycy92aG9zdC92 aG9zdC5oCkBAIC02MCw2ICs2MCwxMyBAQCBlbnVtIHZob3N0X3VhZGRyX3R5cGUgewogCVZIT1NU X05VTV9BRERSUyA9IDMsCiB9OwogCitzdHJ1Y3Qgdmhvc3RfZGVzYyB7CisJdTY0IGFkZHI7CisJ dTMyIGxlbjsKKwl1MTYgZmxhZ3M7IC8qIFZSSU5HX0RFU0NfRl9XUklURSwgVlJJTkdfREVTQ19G X05FWFQgKi8KKwl1MTYgaWQ7Cit9OworCiAvKiBUaGUgdmlydHF1ZXVlIHN0cnVjdHVyZSBkZXNj cmliZXMgYSBxdWV1ZSBhdHRhY2hlZCB0byBhIGRldmljZS4gKi8KIHN0cnVjdCB2aG9zdF92aXJ0 cXVldWUgewogCXN0cnVjdCB2aG9zdF9kZXYgKmRldjsKQEAgLTcxLDYgKzc4LDExIEBAIHN0cnVj dCB2aG9zdF92aXJ0cXVldWUgewogCXZyaW5nX2F2YWlsX3QgX191c2VyICphdmFpbDsKIAl2cmlu Z191c2VkX3QgX191c2VyICp1c2VkOwogCWNvbnN0IHN0cnVjdCB2aG9zdF9pb3RsYl9tYXAgKm1l dGFfaW90bGJbVkhPU1RfTlVNX0FERFJTXTsKKworCXN0cnVjdCB2aG9zdF9kZXNjICpkZXNjczsK KwlpbnQgbmRlc2NzOworCWludCBtYXhfZGVzY3M7CisKIAlzdHJ1Y3QgZmlsZSAqa2ljazsKIAlz dHJ1Y3QgZXZlbnRmZF9jdHggKmNhbGxfY3R4OwogCXN0cnVjdCBldmVudGZkX2N0eCAqZXJyb3Jf Y3R4OwpAQCAtMTc3LDYgKzE4OSwxMCBAQCBsb25nIHZob3N0X3ZyaW5nX2lvY3RsKHN0cnVjdCB2 aG9zdF9kZXYgKmQsIHVuc2lnbmVkIGludCBpb2N0bCwgdm9pZCBfX3VzZXIgKmFyZwogYm9vbCB2 aG9zdF92cV9hY2Nlc3Nfb2soc3RydWN0IHZob3N0X3ZpcnRxdWV1ZSAqdnEpOwogYm9vbCB2aG9z dF9sb2dfYWNjZXNzX29rKHN0cnVjdCB2aG9zdF9kZXYgKik7CiAKK2ludCB2aG9zdF9nZXRfdnFf ZGVzY19iYXRjaChzdHJ1Y3Qgdmhvc3RfdmlydHF1ZXVlICosCisJCSAgICAgIHN0cnVjdCBpb3Zl YyBpb3ZbXSwgdW5zaWduZWQgaW50IGlvdl9jb3VudCwKKwkJICAgICAgdW5zaWduZWQgaW50ICpv dXRfbnVtLCB1bnNpZ25lZCBpbnQgKmluX251bSwKKwkJICAgICAgc3RydWN0IHZob3N0X2xvZyAq bG9nLCB1bnNpZ25lZCBpbnQgKmxvZ19udW0pOwogaW50IHZob3N0X2dldF92cV9kZXNjKHN0cnVj dCB2aG9zdF92aXJ0cXVldWUgKiwKIAkJICAgICAgc3RydWN0IGlvdmVjIGlvdltdLCB1bnNpZ25l ZCBpbnQgaW92X2NvdW50LAogCQkgICAgICB1bnNpZ25lZCBpbnQgKm91dF9udW0sIHVuc2lnbmVk IGludCAqaW5fbnVtLAotLSAKTVNUCgpfX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19f X19fX19fX19fX19fXwpWaXJ0dWFsaXphdGlvbiBtYWlsaW5nIGxpc3QKVmlydHVhbGl6YXRpb25A bGlzdHMubGludXgtZm91bmRhdGlvbi5vcmcKaHR0cHM6Ly9saXN0cy5saW51eGZvdW5kYXRpb24u b3JnL21haWxtYW4vbGlzdGluZm8vdmlydHVhbGl6YXRpb24= From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-9.9 required=3.0 tests=DKIMWL_WL_HIGH,DKIM_SIGNED, DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_PATCH, MAILING_LIST_MULTI,SIGNED_OFF_BY,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id C6280C433DF for ; Thu, 11 Jun 2020 11:35:55 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 941AA2072F for ; Thu, 11 Jun 2020 11:35:55 +0000 (UTC) Authentication-Results: mail.kernel.org; dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com header.b="II04Nj5z" Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728054AbgFKLe3 (ORCPT ); Thu, 11 Jun 2020 07:34:29 -0400 Received: from us-smtp-1.mimecast.com ([207.211.31.81]:26589 "EHLO us-smtp-delivery-1.mimecast.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727869AbgFKLeY (ORCPT ); Thu, 11 Jun 2020 07:34:24 -0400 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1591875262; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=wjgEeFu3a+LIiqg+5XiliZ73/V0mxb4sydm2yHd7x/A=; b=II04Nj5zN8r7GmDe6yjU2bMDck1uuPXOicr4x31xmmiwAZ1XnpNko3g7OGTX2Bi+bhjLAv MioVrJOW+pEJeR5bAq1PM5vbUuwSKpaAOHOeIFzwCilvFbDjOgcy0eZQzeyT0h4gbe4enD uFDsDQbsfGL2xeg3FF8pjfx0pmYWiHo= Received: from mail-wm1-f70.google.com (mail-wm1-f70.google.com [209.85.128.70]) (Using TLS) by relay.mimecast.com with ESMTP id us-mta-273-5r2XbFd2PqKN291XudVHiA-1; Thu, 11 Jun 2020 07:34:20 -0400 X-MC-Unique: 5r2XbFd2PqKN291XudVHiA-1 Received: by mail-wm1-f70.google.com with SMTP id g84so1234621wmf.4 for ; Thu, 11 Jun 2020 04:34:20 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:date:from:to:cc:subject:message-id:references :mime-version:content-disposition:content-transfer-encoding :in-reply-to; bh=wjgEeFu3a+LIiqg+5XiliZ73/V0mxb4sydm2yHd7x/A=; b=A+fQj6TBO8cFcAbUl1jqcqQ9Ur/fpE5MT7LLHprO7fy1PH5VuD3t9aIOuiAeIDyLvA q1DUjyEsbItkuXPRqTcBDHP8/o/pDBCKxHFAd047tuIP6j6MHOEDQLmLBQnlhjq7MCUE ZZgSXO0HAnMJJFJFEojMrHguqwN1n8sB/jQtmVwKCsIDzLfBS0Zso+ovijJ1AzmG9Pjd qOMJHmIqUv8QeUPTqnDcq6xdIPV0aHxXbdp73hp3SrXDudm8UnEtSrIO4QGq9jhr+Fhv riPiQgm4K5OIPkCV1jKR+AUvE2i8+6IqXakMJ648dcfEza/rHx8rZjD3iCNY0SoWE2zO cX/w== X-Gm-Message-State: AOAM530GsjoE66jV5/B1RQdp217gVKbC0Ynbpv0QGas7F442At6GlGuw ATzNfzL440ITcPLV2iXjFX0TsJA41YmLmXc/F11YZ3BLOV84Ff58vDO+ETmVL4ug+j8e5L3DdQt CHxwzi5HwnD+G X-Received: by 2002:a1c:a1c5:: with SMTP id k188mr8099520wme.41.1591875259340; Thu, 11 Jun 2020 04:34:19 -0700 (PDT) X-Google-Smtp-Source: ABdhPJxJjTP+Jltfvil3r7Eu5Xs33+bjgt+vVMAo54RDN03ftXfSQjwjTV1cnO4g7LuNMX5q4+LB7g== X-Received: by 2002:a1c:a1c5:: with SMTP id k188mr8099486wme.41.1591875258829; Thu, 11 Jun 2020 04:34:18 -0700 (PDT) Received: from redhat.com (bzq-79-181-55-232.red.bezeqint.net. [79.181.55.232]) by smtp.gmail.com with ESMTPSA id f71sm3316834wmf.22.2020.06.11.04.34.17 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Thu, 11 Jun 2020 04:34:18 -0700 (PDT) Date: Thu, 11 Jun 2020 07:34:16 -0400 From: "Michael S. Tsirkin" To: linux-kernel@vger.kernel.org Cc: kvm@vger.kernel.org, virtualization@lists.linux-foundation.org, netdev@vger.kernel.org, Jason Wang , eperezma@redhat.com Subject: [PATCH RFC v8 01/11] vhost: option to fetch descriptors through an independent struct Message-ID: <20200611113404.17810-2-mst@redhat.com> References: <20200611113404.17810-1-mst@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline Content-Transfer-Encoding: 8bit In-Reply-To: <20200611113404.17810-1-mst@redhat.com> X-Mailer: git-send-email 2.27.0.106.g8ac3dc51b1 X-Mutt-Fcc: =sent Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org The idea is to support multiple ring formats by converting to a format-independent array of descriptors. This costs extra cycles, but we gain in ability to fetch a batch of descriptors in one go, which is good for code cache locality. When used, this causes a minor performance degradation, it's been kept as simple as possible for ease of review. A follow-up patch gets us back the performance by adding batching. To simplify benchmarking, I kept the old code around so one can switch back and forth between old and new code. This will go away in the final submission. Signed-off-by: Michael S. Tsirkin Signed-off-by: Eugenio Pérez Link: https://lore.kernel.org/r/20200401183118.8334-2-eperezma@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 305 +++++++++++++++++++++++++++++++++++++++++- drivers/vhost/vhost.h | 16 +++ 2 files changed, 320 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 172da092107e..11433d709651 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -303,6 +303,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, struct vhost_virtqueue *vq) { vq->num = 1; + vq->ndescs = 0; vq->desc = NULL; vq->avail = NULL; vq->used = NULL; @@ -373,6 +374,9 @@ static int vhost_worker(void *data) static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) { + kfree(vq->descs); + vq->descs = NULL; + vq->max_descs = 0; kfree(vq->indirect); vq->indirect = NULL; kfree(vq->log); @@ -389,6 +393,10 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; + vq->max_descs = dev->iov_limit; + vq->descs = kmalloc_array(vq->max_descs, + sizeof(*vq->descs), + GFP_KERNEL); vq->indirect = kmalloc_array(UIO_MAXIOV, sizeof(*vq->indirect), GFP_KERNEL); @@ -396,7 +404,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) GFP_KERNEL); vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads), GFP_KERNEL); - if (!vq->indirect || !vq->log || !vq->heads) + if (!vq->indirect || !vq->log || !vq->heads || !vq->descs) goto err_nomem; } return 0; @@ -488,6 +496,8 @@ void vhost_dev_init(struct vhost_dev *dev, for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; + vq->descs = NULL; + vq->max_descs = 0; vq->log = NULL; vq->indirect = NULL; vq->heads = NULL; @@ -2315,6 +2325,299 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, } EXPORT_SYMBOL_GPL(vhost_get_vq_desc); +static struct vhost_desc *peek_split_desc(struct vhost_virtqueue *vq) +{ + BUG_ON(!vq->ndescs); + return &vq->descs[vq->ndescs - 1]; +} + +static void pop_split_desc(struct vhost_virtqueue *vq) +{ + BUG_ON(!vq->ndescs); + --vq->ndescs; +} + +#define VHOST_DESC_FLAGS (VRING_DESC_F_INDIRECT | VRING_DESC_F_WRITE | \ + VRING_DESC_F_NEXT) +static int push_split_desc(struct vhost_virtqueue *vq, struct vring_desc *desc, u16 id) +{ + struct vhost_desc *h; + + if (unlikely(vq->ndescs >= vq->max_descs)) + return -EINVAL; + h = &vq->descs[vq->ndescs++]; + h->addr = vhost64_to_cpu(vq, desc->addr); + h->len = vhost32_to_cpu(vq, desc->len); + h->flags = vhost16_to_cpu(vq, desc->flags) & VHOST_DESC_FLAGS; + h->id = id; + + return 0; +} + +static int fetch_indirect_descs(struct vhost_virtqueue *vq, + struct vhost_desc *indirect, + u16 head) +{ + struct vring_desc desc; + unsigned int i = 0, count, found = 0; + u32 len = indirect->len; + struct iov_iter from; + int ret; + + /* Sanity check */ + if (unlikely(len % sizeof desc)) { + vq_err(vq, "Invalid length in indirect descriptor: " + "len 0x%llx not multiple of 0x%zx\n", + (unsigned long long)len, + sizeof desc); + return -EINVAL; + } + + ret = translate_desc(vq, indirect->addr, len, vq->indirect, + UIO_MAXIOV, VHOST_ACCESS_RO); + if (unlikely(ret < 0)) { + if (ret != -EAGAIN) + vq_err(vq, "Translation failure %d in indirect.\n", ret); + return ret; + } + iov_iter_init(&from, READ, vq->indirect, ret, len); + + /* We will use the result as an address to read from, so most + * architectures only need a compiler barrier here. */ + read_barrier_depends(); + + count = len / sizeof desc; + /* Buffers are chained via a 16 bit next field, so + * we can have at most 2^16 of these. */ + if (unlikely(count > USHRT_MAX + 1)) { + vq_err(vq, "Indirect buffer length too big: %d\n", + indirect->len); + return -E2BIG; + } + if (unlikely(vq->ndescs + count > vq->max_descs)) { + vq_err(vq, "Too many indirect + direct descs: %d + %d\n", + vq->ndescs, indirect->len); + return -E2BIG; + } + + do { + if (unlikely(++found > count)) { + vq_err(vq, "Loop detected: last one at %u " + "indirect size %u\n", + i, count); + return -EINVAL; + } + if (unlikely(!copy_from_iter_full(&desc, sizeof(desc), &from))) { + vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", + i, (size_t)indirect->addr + i * sizeof desc); + return -EINVAL; + } + if (unlikely(desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT))) { + vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n", + i, (size_t)indirect->addr + i * sizeof desc); + return -EINVAL; + } + + /* Note: push_split_desc can't fail here: + * we never fetch unless there's space. */ + ret = push_split_desc(vq, &desc, head); + WARN_ON(ret); + } while ((i = next_desc(vq, &desc)) != -1); + return 0; +} + +/* This function returns a value > 0 if a descriptor was found, or 0 if none were found. + * A negative code is returned on error. */ +static int fetch_descs(struct vhost_virtqueue *vq) +{ + unsigned int i, head, found = 0; + struct vhost_desc *last; + struct vring_desc desc; + __virtio16 avail_idx; + __virtio16 ring_head; + u16 last_avail_idx; + int ret; + + /* Check it isn't doing very strange things with descriptor numbers. */ + last_avail_idx = vq->last_avail_idx; + + if (vq->avail_idx == vq->last_avail_idx) { + if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) { + vq_err(vq, "Failed to access avail idx at %p\n", + &vq->avail->idx); + return -EFAULT; + } + vq->avail_idx = vhost16_to_cpu(vq, avail_idx); + + if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) { + vq_err(vq, "Guest moved used index from %u to %u", + last_avail_idx, vq->avail_idx); + return -EFAULT; + } + + /* If there's nothing new since last we looked, return + * invalid. + */ + if (vq->avail_idx == last_avail_idx) + return 0; + + /* Only get avail ring entries after they have been + * exposed by guest. + */ + smp_rmb(); + } + + /* Grab the next descriptor number they're advertising */ + if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) { + vq_err(vq, "Failed to read head: idx %d address %p\n", + last_avail_idx, + &vq->avail->ring[last_avail_idx % vq->num]); + return -EFAULT; + } + + head = vhost16_to_cpu(vq, ring_head); + + /* If their number is silly, that's an error. */ + if (unlikely(head >= vq->num)) { + vq_err(vq, "Guest says index %u > %u is available", + head, vq->num); + return -EINVAL; + } + + i = head; + do { + if (unlikely(i >= vq->num)) { + vq_err(vq, "Desc index is %u > %u, head = %u", + i, vq->num, head); + return -EINVAL; + } + if (unlikely(++found > vq->num)) { + vq_err(vq, "Loop detected: last one at %u " + "vq size %u head %u\n", + i, vq->num, head); + return -EINVAL; + } + ret = vhost_get_desc(vq, &desc, i); + if (unlikely(ret)) { + vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", + i, vq->desc + i); + return -EFAULT; + } + ret = push_split_desc(vq, &desc, head); + if (unlikely(ret)) { + vq_err(vq, "Failed to save descriptor: idx %d\n", i); + return -EINVAL; + } + } while ((i = next_desc(vq, &desc)) != -1); + + last = peek_split_desc(vq); + if (unlikely(last->flags & VRING_DESC_F_INDIRECT)) { + pop_split_desc(vq); + ret = fetch_indirect_descs(vq, last, head); + if (unlikely(ret < 0)) { + if (ret != -EAGAIN) + vq_err(vq, "Failure detected " + "in indirect descriptor at idx %d\n", head); + return ret; + } + } + + /* Assume notifications from guest are disabled at this point, + * if they aren't we would need to update avail_event index. */ + BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY)); + + /* On success, increment avail index. */ + vq->last_avail_idx++; + + return 1; +} + +/* This looks in the virtqueue and for the first available buffer, and converts + * it to an iovec for convenient access. Since descriptors consist of some + * number of output then some number of input descriptors, it's actually two + * iovecs, but we pack them into one and note how many of each there were. + * + * This function returns the descriptor number found, or vq->num (which is + * never a valid descriptor number) if none was found. A negative code is + * returned on error. */ +int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num, + struct vhost_log *log, unsigned int *log_num) +{ + int ret = fetch_descs(vq); + int i; + + if (ret <= 0) + goto err_fetch; + + /* Now convert to IOV */ + /* When we start there are none of either input nor output. */ + *out_num = *in_num = 0; + if (unlikely(log)) + *log_num = 0; + + for (i = 0; i < vq->ndescs; ++i) { + unsigned iov_count = *in_num + *out_num; + struct vhost_desc *desc = &vq->descs[i]; + int access; + + if (desc->flags & ~VHOST_DESC_FLAGS) { + vq_err(vq, "Unexpected flags: 0x%x at descriptor id 0x%x\n", + desc->flags, desc->id); + ret = -EINVAL; + goto err; + } + if (desc->flags & VRING_DESC_F_WRITE) + access = VHOST_ACCESS_WO; + else + access = VHOST_ACCESS_RO; + ret = translate_desc(vq, desc->addr, + desc->len, iov + iov_count, + iov_size - iov_count, access); + if (unlikely(ret < 0)) { + if (ret != -EAGAIN) + vq_err(vq, "Translation failure %d descriptor idx %d\n", + ret, i); + goto err; + } + if (access == VHOST_ACCESS_WO) { + /* If this is an input descriptor, + * increment that count. */ + *in_num += ret; + if (unlikely(log && ret)) { + log[*log_num].addr = desc->addr; + log[*log_num].len = desc->len; + ++*log_num; + } + } else { + /* If it's an output descriptor, they're all supposed + * to come before any input descriptors. */ + if (unlikely(*in_num)) { + vq_err(vq, "Descriptor has out after in: " + "idx %d\n", i); + ret = -EINVAL; + goto err; + } + *out_num += ret; + } + + ret = desc->id; + } + + vq->ndescs = 0; + + return ret; + +err: + vhost_discard_vq_desc(vq, 1); +err_fetch: + vq->ndescs = 0; + + return ret ? ret : vq->num; +} +EXPORT_SYMBOL_GPL(vhost_get_vq_desc_batch); + /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n) { diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index c8e96a095d3b..87089d51490d 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -60,6 +60,13 @@ enum vhost_uaddr_type { VHOST_NUM_ADDRS = 3, }; +struct vhost_desc { + u64 addr; + u32 len; + u16 flags; /* VRING_DESC_F_WRITE, VRING_DESC_F_NEXT */ + u16 id; +}; + /* The virtqueue structure describes a queue attached to a device. */ struct vhost_virtqueue { struct vhost_dev *dev; @@ -71,6 +78,11 @@ struct vhost_virtqueue { vring_avail_t __user *avail; vring_used_t __user *used; const struct vhost_iotlb_map *meta_iotlb[VHOST_NUM_ADDRS]; + + struct vhost_desc *descs; + int ndescs; + int max_descs; + struct file *kick; struct eventfd_ctx *call_ctx; struct eventfd_ctx *error_ctx; @@ -177,6 +189,10 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg bool vhost_vq_access_ok(struct vhost_virtqueue *vq); bool vhost_log_access_ok(struct vhost_dev *); +int vhost_get_vq_desc_batch(struct vhost_virtqueue *, + struct iovec iov[], unsigned int iov_count, + unsigned int *out_num, unsigned int *in_num, + struct vhost_log *log, unsigned int *log_num); int vhost_get_vq_desc(struct vhost_virtqueue *, struct iovec iov[], unsigned int iov_count, unsigned int *out_num, unsigned int *in_num, -- MST