All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andreas Hindborg <nmi@metaspace.dk>
To: Benno Lossin <benno.lossin@proton.me>
Cc: "Jens Axboe" <axboe@kernel.dk>, "Christoph Hellwig" <hch@lst.de>,
	"Keith Busch" <kbusch@kernel.org>,
	"Damien Le Moal" <dlemoal@kernel.org>,
	"Bart Van Assche" <bvanassche@acm.org>,
	"Hannes Reinecke" <hare@suse.de>,
	"Ming Lei" <ming.lei@redhat.com>,
	"linux-block@vger.kernel.org" <linux-block@vger.kernel.org>,
	"Andreas Hindborg" <a.hindborg@samsung.com>,
	"Wedson Almeida Filho" <wedsonaf@gmail.com>,
	"Greg KH" <gregkh@linuxfoundation.org>,
	"Matthew Wilcox" <willy@infradead.org>,
	"Miguel Ojeda" <ojeda@kernel.org>,
	"Alex Gaynor" <alex.gaynor@gmail.com>,
	"Boqun Feng" <boqun.feng@gmail.com>,
	"Gary Guo" <gary@garyguo.net>,
	"Björn Roy Baron" <bjorn3_gh@protonmail.com>,
	"Alice Ryhl" <aliceryhl@google.com>,
	"Chaitanya Kulkarni" <chaitanyak@nvidia.com>,
	"Luis Chamberlain" <mcgrof@kernel.org>,
	"Yexuan Yang" <1182282462@bupt.edu.cn>,
	"Sergio González Collado" <sergio.collado@gmail.com>,
	"Joel Granados" <j.granados@samsung.com>,
	"Pankaj Raghav (Samsung)" <kernel@pankajraghav.com>,
	"Daniel Gomez" <da.gomez@samsung.com>,
	"Niklas Cassel" <Niklas.Cassel@wdc.com>,
	"Philipp Stanner" <pstanner@redhat.com>,
	"Conor Dooley" <conor@kernel.org>,
	"Johannes Thumshirn" <Johannes.Thumshirn@wdc.com>,
	"Matias Bjørling" <m@bjorling.me>,
	"open list" <linux-kernel@vger.kernel.org>,
	"rust-for-linux@vger.kernel.org" <rust-for-linux@vger.kernel.org>,
	"lsf-pc@lists.linux-foundation.org"
	<lsf-pc@lists.linux-foundation.org>,
	"gost.dev@samsung.com" <gost.dev@samsung.com>
Subject: Re: [PATCH v3 1/3] rust: block: introduce `kernel::block::mq` module
Date: Sat, 01 Jun 2024 13:11:05 +0200	[thread overview]
Message-ID: <87ikysor3q.fsf@metaspace.dk> (raw)
In-Reply-To: <47a8ce04-3901-49ae-abac-a7d85901f980@proton.me> (Benno Lossin's message of "Sat, 01 Jun 2024 09:43:47 +0000")

Benno Lossin <benno.lossin@proton.me> writes:

> On 01.06.24 10:18, Andreas Hindborg wrote:
>> From: Andreas Hindborg <a.hindborg@samsung.com>
>
> [...]
>
>> +impl<T: Operations> GenDisk<T, Initialized> {
>> +    /// Try to create a new `GenDisk`.
>> +    pub fn try_new(tagset: Arc<TagSet<T>>) -> Result<Self> {
>> +        let lock_class_key = crate::sync::LockClassKey::new();
>> +
>> +        // SAFETY: `tagset.raw_tag_set()` points to a valid and initialized tag set
>> +        let gendisk = from_err_ptr(unsafe {
>> +            bindings::__blk_mq_alloc_disk(
>> +                tagset.raw_tag_set(),
>> +                core::ptr::null_mut(), // TODO: We can pass queue limits right here
>> +                core::ptr::null_mut(),
>> +                lock_class_key.as_ptr(),
>> +            )
>> +        })?;
>> +
>> +        const TABLE: bindings::block_device_operations = bindings::block_device_operations {
>> +            submit_bio: None,
>> +            open: None,
>> +            release: None,
>> +            ioctl: None,
>> +            compat_ioctl: None,
>> +            check_events: None,
>> +            unlock_native_capacity: None,
>> +            getgeo: None,
>> +            set_read_only: None,
>> +            swap_slot_free_notify: None,
>> +            report_zones: None,
>> +            devnode: None,
>> +            alternative_gpt_sector: None,
>> +            get_unique_id: None,
>> +            // TODO: Set to THIS_MODULE. Waiting for const_refs_to_static feature to
>> +            // be merged (unstable in rustc 1.78 which is staged for linux 6.10)
>> +            // https://github.com/rust-lang/rust/issues/119618
>
> AFAIK the 1.78 upgrade already is in rust-next (and also should appear
> in v6.10-rc2, right?) do you have this on your radar?

I am tracking this and I plan to add support in a later patch.

>
>> +            owner: core::ptr::null_mut(),
>> +            pr_ops: core::ptr::null_mut(),
>> +            free_disk: None,
>> +            poll_bio: None,
>> +        };
>> +
>> +        // SAFETY: gendisk is a valid pointer as we initialized it above
>> +        unsafe { (*gendisk).fops = &TABLE };
>> +
>> +        // INVARIANT: `gendisk` was initialized above.
>> +        // INVARIANT: `gendisk.queue.queue_data` is set to `data` in the call to
>> +        // `__blk_mq_alloc_disk` above.
>> +        Ok(GenDisk {
>> +            tagset,
>> +            gendisk,
>> +            _phantom: PhantomData,
>> +        })
>> +    }
>
> [...]
>
>> +    /// This function is called by the C kernel. A pointer to this function is
>> +    /// installed in the `blk_mq_ops` vtable for the driver.
>> +    ///
>> +    /// # Safety
>> +    ///
>> +    /// This function may only be called by blk-mq C infrastructure.
>> +    unsafe extern "C" fn commit_rqs_callback(_hctx: *mut bindings::blk_mq_hw_ctx) {
>> +        T::commit_rqs()
>> +    }
>> +
>> +    /// This function is called by the C kernel. It is not currently
>> +    /// implemented, and there is no way to exercise this code path.
>
> Is it also possible to completely remove it? ie use `None` in the
> VTABLE, or will the C side error?

No, this pointer is not allowed to be null. It must be a callable
function, hence the stub. It will be populated soon enough when I send
patches for the remote completion logic.

>
>> +    ///
>> +    /// # Safety
>> +    ///
>> +    /// This function may only be called by blk-mq C infrastructure.
>> +    unsafe extern "C" fn complete_callback(_rq: *mut bindings::request) {}
>
> [...]
>
>> +impl<'a> RawWriter<'a> {
>> +    /// Create a new `RawWriter` instance.
>> +    fn new(buffer: &'a mut [u8]) -> Result<RawWriter<'a>> {
>> +        *(buffer.last_mut().ok_or(EINVAL)?) = 0;
>> +
>> +        // INVARIANT: We null terminated the buffer above.
>> +        Ok(Self { buffer, pos: 0 })
>> +    }
>> +
>> +    pub(crate) fn from_array<const N: usize>(
>> +        a: &'a mut [core::ffi::c_char; N],
>> +    ) -> Result<RawWriter<'a>> {
>
> You could change the return type to be `RawWriter<'a>` and check using
> `build_assert!` that `N > 0`. Then you can also call `unwrap_unchecked`
> on the result that you get below.
>
> I don't know if we want that, but it is a possibility.

I guess we could potentially make the type generic over a const buffer
size. But let's put a pin in that for now. I'll look into that down the road.

>
>> +        Self::new(
>> +            // SAFETY: the buffer of `a` is valid for read and write as `u8` for
>> +            // at least `N` bytes.
>> +            unsafe { core::slice::from_raw_parts_mut(a.as_mut_ptr().cast::<u8>(), N) },
>> +        )
>> +    }
>> +}
>
> [...]
>
>> +/// Store the result of `op(target.load())` in target, returning new value of
>> +/// taret.
>> +fn atomic_relaxed_op_return(target: &AtomicU64, op: impl Fn(u64) -> u64) -> u64 {
>> +    let old = target.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| Some(op(x)));
>> +
>> +    // SAFETY: Because the operation passed to `fetch_update` above always
>> +    // return `Some`, `old` will always be `Ok`.
>> +    let old = unsafe { old.unwrap_unchecked() };
>> +
>> +    op(old)
>> +}
>> +
>> +/// Store the result of `op(target.load)` in `target` if `target.load() !=
>> +/// pred`, returning previous value of target
>
> The function returns a bool, not a u64 (value). From the body I read
> that you return whether the value was updated.

Thanks 👍

>
>> +fn atomic_relaxed_op_unless(target: &AtomicU64, op: impl Fn(u64) -> u64, pred: u64) -> bool {
>> +    let x = target.load(Ordering::Relaxed);
>> +    loop {
>> +        if x == pred {
>> +            break;
>> +        }
>> +        if target
>> +            .compare_exchange_weak(x, op(x), Ordering::Relaxed, Ordering::Relaxed)
>> +            .is_ok()
>> +        {
>> +            break;
>> +        }
>
> If this fails, you are not re-reading the value of `target`, so if
> someone else just set it to `pred`, you will still continue to try to
> set it from `x` to `op(x)`, but it might never have the value `x` again.
> This would lead to a potentially infinite loop, right?

Yea, that is not good. I should have moved the assignment of `x` into the loop

>
>> +    }
>
> Do you think you can also implement this using `fetch_update`? I guess
> this would do what you want, right?:
>
>     target.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| {
>         if x == pred {
>             None
>         } else {
>             Some(op(x))
>         }
>     }).is_ok()

Makes sense, I will steal that.

>
>> +
>> +    x == pred
>> +}
>
> [...]
>
>> +impl<T: Operations> TagSet<T> {
>> +    /// Try to create a new tag set
>> +    pub fn try_new(
>> +        nr_hw_queues: u32,
>> +        num_tags: u32,
>> +        num_maps: u32,
>> +    ) -> impl PinInit<Self, error::Error> {
>> +        try_pin_init!( TagSet {
>> +            // INVARIANT: We initialize `inner` here and it is valid after the
>> +            // initializer has run.
>> +            inner <- unsafe {kernel::init::pin_init_from_closure(move |place: *mut Opaque<bindings::blk_mq_tag_set>| -> Result<()> {
>> +                let place = place.cast::<bindings::blk_mq_tag_set>();
>> +
>> +                // SAFETY: pin_init_from_closure promises that `place` is writable, and
>> +                // zeroes is a valid bit pattern for this structure.
>> +                core::ptr::write_bytes(place, 0, 1);
>> +
>> +                /// For a raw pointer to a struct, write a struct field without
>> +                /// creating a reference to the field
>> +                macro_rules! write_ptr_field {
>> +                    ($target:ident, $field:ident, $value:expr) => {
>> +                        ::core::ptr::write(::core::ptr::addr_of_mut!((*$target).$field), $value)
>> +                    };
>> +                }
>> +
>> +                // SAFETY: pin_init_from_closure promises that `place` is writable
>> +                    write_ptr_field!(place, ops, OperationsVTable::<T>::build());
>> +                    write_ptr_field!(place, nr_hw_queues , nr_hw_queues);
>> +                    write_ptr_field!(place, timeout , 0); // 0 means default which is 30 * HZ in C
>> +                    write_ptr_field!(place, numa_node , bindings::NUMA_NO_NODE);
>> +                    write_ptr_field!(place, queue_depth , num_tags);
>> +                    write_ptr_field!(place, cmd_size , core::mem::size_of::<RequestDataWrapper>().try_into()?);
>> +                    write_ptr_field!(place, flags , bindings::BLK_MQ_F_SHOULD_MERGE);
>> +                    write_ptr_field!(place, driver_data , core::ptr::null_mut::<core::ffi::c_void>());
>> +                    write_ptr_field!(place, nr_maps , num_maps);
>
> Did something not work with my suggestion?

I did not want to change it if we are rewriting it to `Opaque::init`
in a cycle or two, which I think is a better solution.


Best regards,
Andreas

  parent reply	other threads:[~2024-06-01 11:11 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-01  8:18 [PATCH v3 0/3] Rust block device driver API and null block driver Andreas Hindborg
2024-06-01  8:18 ` [PATCH v3 1/3] rust: block: introduce `kernel::block::mq` module Andreas Hindborg
2024-06-01  9:43   ` Benno Lossin
2024-06-01  9:59     ` Miguel Ojeda
2024-06-01 11:11     ` Andreas Hindborg [this message]
2024-06-01 12:05       ` Benno Lossin
2024-06-01 12:51         ` Andreas Hindborg
2024-06-01  8:18 ` [PATCH v3 2/3] rust: block: add rnull, Rust null_blk implementation Andreas Hindborg
2024-06-01  9:15   ` Benno Lossin
2024-06-01 11:15     ` Andreas Hindborg
2024-06-01  8:18 ` [PATCH v3 3/3] MAINTAINERS: add entry for Rust block device driver API Andreas Hindborg

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87ikysor3q.fsf@metaspace.dk \
    --to=nmi@metaspace.dk \
    --cc=1182282462@bupt.edu.cn \
    --cc=Johannes.Thumshirn@wdc.com \
    --cc=Niklas.Cassel@wdc.com \
    --cc=a.hindborg@samsung.com \
    --cc=alex.gaynor@gmail.com \
    --cc=aliceryhl@google.com \
    --cc=axboe@kernel.dk \
    --cc=benno.lossin@proton.me \
    --cc=bjorn3_gh@protonmail.com \
    --cc=boqun.feng@gmail.com \
    --cc=bvanassche@acm.org \
    --cc=chaitanyak@nvidia.com \
    --cc=conor@kernel.org \
    --cc=da.gomez@samsung.com \
    --cc=dlemoal@kernel.org \
    --cc=gary@garyguo.net \
    --cc=gost.dev@samsung.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=hare@suse.de \
    --cc=hch@lst.de \
    --cc=j.granados@samsung.com \
    --cc=kbusch@kernel.org \
    --cc=kernel@pankajraghav.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lsf-pc@lists.linux-foundation.org \
    --cc=m@bjorling.me \
    --cc=mcgrof@kernel.org \
    --cc=ming.lei@redhat.com \
    --cc=ojeda@kernel.org \
    --cc=pstanner@redhat.com \
    --cc=rust-for-linux@vger.kernel.org \
    --cc=sergio.collado@gmail.com \
    --cc=wedsonaf@gmail.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.