public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Arnd Bergmann <arnd-r2nGTMty4D4@public.gmane.org>
To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
Cc: cborntra-tA70FqPdS9bQT0dZR+AlfA@public.gmane.org,
	schwidefsky-tA70FqPdS9bQT0dZR+AlfA@public.gmane.org
Subject: Re: [PATCH/RFC 2/2] s390 virtualization interface.
Date: Fri, 27 Apr 2007 18:53:40 +0200	[thread overview]
Message-ID: <200704271853.41523.arnd@arndb.de> (raw)
In-Reply-To: <1177681235.5770.22.camel-WIxn4w2hgUz3YA32ykw5MLlKpX0K8NHHQQ4Iyu8u01E@public.gmane.org>

On Friday 27 April 2007, Carsten Otte wrote:
> Add interface which allows a process to start a virtual machine.
 
Looks pretty good to me already.

It seems a lot closer to lguest than to kvm at the moment concerning
the kernel interfaces (guest real address == host process, state
is attached to the thread_info, ...).

Now for the nitpicking:

> +static DEFINE_MUTEX(s390host_init_mutex);
> +
> +static void s390host_get_data(struct s390host_data *data)
> +{
> +	atomic_inc(&data->count);
> +}
> +
> +void s390host_put_data(struct s390host_data *data)
> +{
> +	int cpu;
> +
> +	if (atomic_dec_return(&data->count))
> +		return;
> +
> +	for (cpu = 0; cpu < S390HOST_MAX_CPUS; cpu++)
> +		if (data->sie_io[cpu])
> +			free_page((unsigned long)data->sie_io[cpu]);
> +
> +	if (data->sca_block)
> +		free_page((unsigned long)data->sca_block);
> +
> +	kfree(data);
> +}

These should probably use a struct kref instead of an atomic_t
to count the references.

> +static unsigned long
> +s390host_create_io_area(unsigned long addr, unsigned long flags,
> +			unsigned long io_addr, struct s390host_data *data)
> +{
> +	struct mm_struct *mm = current->mm;
> +	struct vm_area_struct *vma;
> +	unsigned long ret;
> +
> +	flags &= MAP_FIXED;
> +	addr = get_unmapped_area(NULL, addr, 2 * PAGE_SIZE, 0, flags);
> +
> +	if (addr & ~PAGE_MASK)
> +		return addr;
> +
> +	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
> +
> +	if (!vma)
> +		return -ENOMEM;
> +
> +	vma->vm_mm = mm;
> +	vma->vm_start = addr;
> +	vma->vm_end = addr + 2 * PAGE_SIZE;
> +	vma->vm_flags =	VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED;
> +	vma->vm_flags |= VM_SHARED | VM_MAYSHARE | VM_DONTCOPY;
> +
> +#if 1	/* FIXME: write access until sys_s390host_sie interface is final */
> +	vma->vm_flags |= VM_WRITE | VM_MAYWRITE;
> +#endif
> +
> +	vma->vm_page_prot = protection_map[vma->vm_flags & 0xf];
> +	vma->vm_private_data = data;
> +	vma->vm_ops = &s390host_vmops;
> +
> +	down_write(&mm->mmap_sem);
> +	ret = insert_vm_struct(mm, vma);
> +	if (ret) {
> +		kmem_cache_free(vm_area_cachep, vma);
> +		goto out;
> +	}
> +	s390host_get_data(data);
> +	mm->total_vm += 2;
> +	vm_insert_page(vma, addr, virt_to_page(io_addr));
> +
> +	ret = split_vma(mm, vma, addr + PAGE_SIZE, 0);
> +	if (ret)
> +		goto out;
> +	s390host_get_data(data);
> +
> +	vma = find_vma(mm, addr + PAGE_SIZE);
> +	vma->vm_flags |= VM_WRITE | VM_MAYWRITE;
> +	vma->vm_page_prot = protection_map[vma->vm_flags & 0xf];
> +	vm_insert_page(vma, addr + PAGE_SIZE,
> +		       virt_to_page(io_addr + PAGE_SIZE));
> +	ret = addr;
> +out:
> +	up_write(&mm->mmap_sem);
> +	return ret;
> +}

This open-coded mmap looks a bit scary. I can't see anything wrong in it,
but it may conflict with a user application that tries to do other strange
things with memory maps.

Maybe you can either make this a real call to mmap(), or have it automatically
mapped using the vdso mechanism.

> +long sys_s390host_add_cpu(unsigned long addr, unsigned long flags,
> +			  struct sie_block __user *sie_template)
> +{
> +	struct sie_block *sie_block;
> +	struct sie_io *sie_io;
> +	struct sca_block *sca_block;
> +	struct s390host_data *data = NULL;
> +	unsigned long ret;
> +	__u16 cpu;

__u16 is a type for user space interfaces. In the kernel, use u16.

> +	if (current_thread_info()->sie_cpu != -1)
> +		return -EINVAL;

-EBUSY?

> +	if (copy_from_user(&cpu, &sie_template->icpua, sizeof(u16)))
> +		return -EFAULT;

get_user()?

> +	if (cpu >= S390HOST_MAX_CPUS)
> +		return -EINVAL;
> +
> +	mutex_lock(&s390host_init_mutex);
> +
> +	data = get_s390host_context();
> +	if (!data) {
> +		ret = -ENOMEM;
> +		goto out_err;
> +	}
> +
> +	sca_block = data->sca_block;
> +	if (sca_block->mcn & (1UL << (S390HOST_MAX_CPUS - 1 - cpu))) {

__test_bit()?

> +		ret = -EINVAL;

-EBUSY?

> +int sys_s390host_remove_cpu(void)
> +{
> +	struct sca_block *sca_block;
> +	int cpu;
> +
> +	cpu = current_thread_info()->sie_cpu;
> +	if (cpu == -1)
> +		return -EINVAL;
> +
> +	mutex_lock(&s390host_init_mutex);
> +	sca_block = current_thread_info()->s390host_data->sca_block;
> +	sca_block->mcn &= ~(1UL << (S390HOST_MAX_CPUS - 1 - cpu));
> +	current_thread_info()->sie_cpu = -1;
> +	mutex_unlock(&s390host_init_mutex);
> +	return 0;
> +}

Shouldn't this free the resources that were allocated by add_cpu?
If not, this syscall seems pretty useless and I'd simply not
do it at all -- just wait until exit() to clean up.

> +	sie_io = current_thread_info()->s390host_data->sie_io[cpu];
> +
> +	if (action)
> +		ret = s390host_do_action(action, sie_io);
> +	if (ret)
> +		goto out_err;
> +	sie_kernel = &sie_io->sie_kernel;
> +	sie_user = &sie_io->sie_user;
> +
> +	save_fp_regs(&sie_kernel->host_fpregs);
> +	save_access_regs(sie_kernel->host_acrs);
> +	sie_user->guest_fpregs.fpc &= FPC_VALID_MASK;
> +	restore_fp_regs(&sie_user->guest_fpregs);
> +	restore_access_regs(sie_user->guest_acrs);
> +	memcpy(&sie_kernel->sie_block.gg14, &sie_user->gprs[14], 16);
> +again:
> +	if (need_resched())
> +		schedule();
> +
> +	sie_kernel->sie_block.icptcode = 0;
> +	ret = sie64a(sie_kernel);
> +	if (ret)
> +		goto out;
> +
> +	if (signal_pending(current)) {
> +		ret = -EINTR;
> +		goto out;
> +	}

Can't you handle interrupted system calls? I think it would be nicer
to return -ERESTARTSYS here so you don't have to go back to your
user space after a signal.

> +struct sie_kernel {
> +	struct sie_block	sie_block;
> +	s390_fp_regs	host_fpregs;
> +	int		host_acrs[NUM_ACRS];
> +} __attribute__((packed,aligned(4096)));

Why packed?

> +#define SIE_UPDATE_PSW		(1UL << 0)
> +#define SIE_FLUSH_TLB		(1UL << 1)
> +#define SIE_ISKE		(1UL << 2)
> +#define SIE_SSKE		(1UL << 3)
> +#define SIE_BLOCK_UPDATE	(1UL << 4)
> +#define SIE_VSMXM_LOCAL_UPDATE	(1UL << 5)
> +#define SIE_VSMXM_DIST_UPDATE   (1UL << 6)

I find it much more readable to define these like

#define SIE_UPDATE_PSW		1
#define SIE_FLUSH_TLB		2
#define SIE_ISKE		4

especially when trying to interpret values in memory.

> +struct sie_user {
> +	struct sie_block	sie_block;
> +	psw_t			psw;
> +	unsigned long		gprs[NUM_GPRS];
> +	s390_fp_regs		guest_fpregs;
> +	int			guest_acrs[NUM_ACRS];
> +	struct sie_skey_parm	iske_parm;
> +	struct sie_skey_parm	sske_parm;
> +	int			vsmxm_or_local;
> +	int			vsmxm_and_local;
> +	int			vsmxm_or;
> +	int			vsmxm_and;
> +	int			vsmxm_cpuid;
> +} __attribute__((packed,aligned(4096)));

Is this data structure extensible? If it is, you probably need
some sort of versioning information to make sure that user space
doesn't rely on fields that the kernel doesn't know about.

> +struct sca_entry {
> +	atomic_t scn;
> +	__u64	reserved;
> +	__u64	sda;
> +	__u64	reserved2[2];
> +}__attribute__((packed));

Is this a hardware data structure? If not, you shouldn't pack it
because the first word is only 32 bits and consequently all following
ones are unaligned.

> +/* function definitions */
> +extern int sie64a(struct sie_kernel *);
> +extern void s390host_put_data(struct s390host_data *);

These should be inside of '#ifdef __KERNEL__', like all declarations that user
space doesn't need to see.

> @@ -52,6 +53,8 @@ struct thread_info {
>  	unsigned int		cpu;		/* current CPU */
>  	int			preempt_count;	/* 0 => preemptable, <0 => BUG */
>  	struct restart_block	restart_block;
> +	struct s390host_data	*s390host_data;	/* s390host data */
> +	int			sie_cpu;	/* sie cpu number */
>  };

What happens to these variables on execve?

	Arnd <><

-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

  parent reply	other threads:[~2007-04-27 16:53 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-04-27 13:40 [PATCH/RFC 2/2] s390 virtualization interface Carsten Otte
     [not found] ` <1177681235.5770.22.camel-WIxn4w2hgUz3YA32ykw5MLlKpX0K8NHHQQ4Iyu8u01E@public.gmane.org>
2007-04-27 15:23   ` Anthony Liguori
     [not found]     ` <4632155C.8030503-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2007-04-27 15:45       ` Carsten Otte
2007-04-27 16:53   ` Arnd Bergmann [this message]
     [not found]     ` <200704271853.41523.arnd-r2nGTMty4D4@public.gmane.org>
2007-04-29  8:00       ` Heiko Carstens
     [not found]         ` <20070429080039.GA8332-5VkHqLvV2o3MbYB6QlFGEg@public.gmane.org>
2007-05-01 21:12           ` Arnd Bergmann
     [not found]             ` <200705012312.11692.arnd-r2nGTMty4D4@public.gmane.org>
2007-05-02  4:46               ` Avi Kivity
2007-05-02  8:25               ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200704271853.41523.arnd@arndb.de \
    --to=arnd-r2ngtmty4d4@public.gmane.org \
    --cc=cborntra-tA70FqPdS9bQT0dZR+AlfA@public.gmane.org \
    --cc=kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org \
    --cc=schwidefsky-tA70FqPdS9bQT0dZR+AlfA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox