Re: [patch] mspec driver for 2.6.12-rc2-mm3

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Christoph Hellwig <hch@infradead.org>
To: Jes Sorensen <jes@wildopensource.com>
Cc: Andrew Morton <akpm@osdl.org>, linux-kernel@vger.kernel.org
Subject: Re: [patch] mspec driver for 2.6.12-rc2-mm3
Date: Wed, 13 Apr 2005 21:43:35 +0100	[thread overview]
Message-ID: <20050413204335.GA17012@infradead.org> (raw)
In-Reply-To: <yq07jj8123j.fsf@jaguar.mkp.net>

On Tue, Apr 12, 2005 at 10:50:08AM -0400, Jes Sorensen wrote:
> +config MSPEC
> +	tristate "Special Memory support"
> +	select GENERIC_ALLOCATOR

should depend on IA64_GENERIC || IA64_SGI_SN2 because it's using sn2
functions like bte_copy

> +#define BTE_ZERO_BLOCK(_maddr, _len) \
> +	bte_copy(0, _maddr - __IA64_UNCACHED_OFFSET, _len, BTE_WACQUIRE | BTE_ZERO_FILL, NULL)

should become an line function.

> +static int fetchop_mmap(struct file *file, struct vm_area_struct *vma);
> +static int cached_mmap(struct file *file, struct vm_area_struct *vma);
> +static int uncached_mmap(struct file *file, struct vm_area_struct *vma);
> +static void mspec_open(struct vm_area_struct *vma);
> +static void mspec_close(struct vm_area_struct *vma);
> +static struct page * mspec_nopage(struct vm_area_struct *vma,
> +					unsigned long address, int *unused);

please try to reorder the code to avoid forward-prototypes where easily
possible.

> +/*
> + * There is one of these structs per node. It is used to manage the mspec
> + * space that is available on the node. Current assumption is that there is
> + * only 1 mspec block of memory per node.
> + */
> +struct node_mspecs {
> +	long		maddr;		/* phys addr of start of mspecs. */
> +	int		count;		/* Total number of mspec pages. */
> +	atomic_t	free;		/* Number of pages currently free. */
> +	unsigned long	bits[1];	/* Bitmap for managing pages. */

Please use the bits[0] gcc extensions so all the calculations for the
variable sized array easily make sense.

> +/*
> + * One of these structures is allocated when an mspec region is mmaped. The
> + * structure is pointed to by the vma->vm_private_data field in the vma struct. 
> + * This structure is used to record the addresses of the mspec pages.
> + */
> +struct vma_data {
> +	atomic_t	refcnt;		/* Number of vmas sharing the data. */
> +	spinlock_t	lock;		/* Serialize access to the vma. */
> +	int		count;		/* Number of pages allocated. */
> +	int		type;		/* Type of pages allocated. */
> +	unsigned long	maddr[1];	/* Array of MSPEC addresses. */

dito

> +};
> +
> +
> +/*
> + * Memory Special statistics.
> + */
> +struct mspec_stats {
> +	atomic_t	map_count;	/* Number of active mmap's */
> +	atomic_t	pages_in_use;	/* Number of mspec pages in use */
> +	unsigned long	pages_total;	/* Total number of mspec pages */
> +};
> +
> +static struct mspec_stats	mspec_stats;
> +static struct node_mspecs	*node_mspecs[MAX_NUMNODES];
> +
> +#define MAX_UNCACHED_GRANULES	5
> +static int allocated_granules;
> +
> +struct gen_pool *mspec_pool[MAX_NUMNODES];

> +static unsigned long
> +mspec_get_new_chunk(struct gen_pool *poolp)
> +{
> +	struct page *page;
> +	void *tmp;
> +	int status, node, i;
> +	unsigned long addr;
> +
> +	if (allocated_granules >= MAX_UNCACHED_GRANULES)
> +		return 0;
> +
> +	node = (int)poolp->private;

maybe the private data in the genpool should be an union of
void * and unsigned long so we can avoid all those casrs?

> +	page = alloc_pages_node(node, GFP_KERNEL,
> +				IA64_GRANULE_SHIFT-PAGE_SHIFT);

> +	tmp = page_address(page);
> +	memset(tmp, 0, IA64_GRANULE_SIZE);

shouldn't you just use __GFP_ZERO?

> +#if DEBUG
> +	printk(KERN_INFO "pal_prefetch_visibility() returns %i on cpu %i\n",
> +	       status, get_cpu());
> +#endif

same dprintk comment as for genalloc.

> +	vdata->lock = SPIN_LOCK_UNLOCKED;

you're supposed to use spin_lock_init() these days.

> +/*
> + * mspec_get_one_pte
> + *
> + * Return the pte for a given mm and address.
> + */
> +static __inline__ int
> +mspec_get_one_pte(struct mm_struct *mm, u64 address, pte_t **pte)
> +{
> +	pgd_t *pgd;
> +	pmd_t *pmd;
> +	pud_t *pud;
> +
> +	pgd = pgd_offset(mm, address);
> +	if (pgd_present(*pgd)) {
> +		pud = pud_offset(pgd, address);
> +		if (pud_present(*pud)) {
> +			pmd = pmd_offset(pud, address);
> +			if (pmd_present(*pmd)) {
> +				*pte = pte_offset_map(pmd, address);
> +				if (pte_present(**pte)) {
> +					return 0;
> +				}
> +			}
> +		}
> +	}
> +
> +	return -1;
> +}

> +	spin_lock(&vdata->lock);
> +
> +	index = (address - vma->vm_start) >> PAGE_SHIFT;
> +	if (vdata->maddr[index] == 0) {
> +		vdata->count++;
> +		maddr = mspec_alloc_page(numa_node_id(), vdata->type);

this looks like a page allocation under spinlock.

> +	/*
> +	 * The kernel requires a page structure to be returned upon
> +	 * success, but there are no page structures for low granule pages.
> +	 * remap_page_range() creates the pte for us and we return a
> +	 * bogus page back to the kernel fault handler to keep it happy
> +	 * (the page is freed immediately there).
> +	 */

Please don't use the ->nopage approach thenm but do remap_pfn_range
beforehand.  ->nopage is very nice if the region is actually backed by
normal RAM, but what you're doing doesn't make much sense.

> +/*
> + * Walk the EFI memory map to pull out leftover pages in the lower
> + * memory regions which do not end up in the regular memory map and
> + * stick them into the uncached allocator
> + */
> +static void __init
> +mspec_walk_efi_memmap_uc (void)

I'm pretty sure this was NACKed on the ia64 list, and SGI was told to do
a more generic efi memmap walk.

> +	/*
> +	 * The fetchop device only works on SN2 hardware, uncached and cached
> +	 * memory drivers should both be valid on all ia64 hardware
> +	 */

In which case my above comment about the depency doesn't make sense, but
you'll have to split the driver into separate files or add ifdefs.  Please
test it on some non-sgi hardware with a non-generic kernel build.

> +	printk(KERN_INFO "%s: v%s\n", FETCHOP_DRIVER_ID_STR, REVISION);
> +	printk(KERN_INFO "%s: v%s\n", CACHED_DRIVER_ID_STR, REVISION);
> +	printk(KERN_INFO "%s: v%s\n", UNCACHED_DRIVER_ID_STR, REVISION);

Please keep the noise level down and remove these.

> +unsigned long
> +mspec_kalloc_page(int nid)
> +{
> +	return TO_AMO(mspec_alloc_page(nid, MSPEC_FETCHOP));
> +}
> +
> +
> +void
> +mspec_kfree_page(unsigned long maddr)
> +{
> +	mspec_free_page(TO_PHYS(maddr) + __IA64_UNCACHED_OFFSET);
> +}
> +EXPORT_SYMBOL(mspec_kalloc_page);
> +EXPORT_SYMBOL(mspec_kfree_page);

What is this for?  these look like really odd APIs.

next prev parent reply	other threads:[~2005-04-13 20:43 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-04-12  9:56 [patch] mspec driver for 2.6.12-rc2-mm3 Jes Sorensen
2005-04-12 10:21 ` Andrew Morton
2005-04-12 10:22 ` Andrew Morton
2005-04-12 10:27 ` Andrew Morton
2005-04-12 14:50   ` Jes Sorensen
2005-04-13 20:43     ` Christoph Hellwig [this message]
2005-04-22 11:21       ` Jes Sorensen
2005-04-24 10:16         ` Christoph Hellwig
2005-04-25 10:13           ` Jes Sorensen
2005-04-25 14:32             ` Christoph Hellwig
2005-04-25 14:41             ` efi_memmap_walk_uc, was " Christoph Hellwig
2005-04-25 14:47             ` returning non-ram via ->nopage, " Christoph Hellwig
2005-04-26 22:14               ` Jes Sorensen
2005-04-27 15:53                 ` Jeff Garzik
2005-04-27 15:55                   ` Christoph Hellwig
2005-04-27 18:03                     ` Jes Sorensen
2005-04-27 18:55                       ` Russell King
2005-05-03 20:40                 ` William Lee Irwin III

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20050413204335.GA17012@infradead.org \
    --to=hch@infradead.org \
    --cc=akpm@osdl.org \
    --cc=jes@wildopensource.com \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox