/*   W     W   AAA   RRRR   N   N  III  N   N   GGG   !!!
**   W     W  A   A  R   R  NN  N   I   NN  N  G   G  !!!
**   W  W  W  AAAAA  RRRR   N N N   I   N N N  G       !
**    W W W   A   A  R   R  N  NN   I   N  NN  G  GG
**     W W    A   A  R   R  N   N  III  N   N   GGG   !!!
**
** WARNING: This file is program generated by codegenerator.py.
**
** DO NOT EDIT THIS FILE! Any changes made to this file will be lost!
*/

#include <Python.h>
#include <stdio.h>
#include "libnumarray.h"

#define STDC_LT(a,b) ((a) < (b))
#define STDC_LE(a,b) ((a) <= (b))
#define STDC_EQ(a,b) ((a) == (b))

#define SWAP(a,b) { SWAP_temp = a; a = b; b = SWAP_temp;}

#define swap(i, j)   { temp=v[i]; v[i]=v[j]; v[j]=temp; }
#define wswap(i, j)   { temp=v[i];  v[i]=v[j]; v[j]=temp; wtemp=w[i];  w[i]=w[j]; w[j]=wtemp; }

/****************** Bool *******************/


void  quicksort0Bool(Bool *pl, Bool *pr)
{
	Bool vp, SWAP_temp;
	Bool *stack[100], **sptr = stack, *pm, *pi, *pj, *pt;
        
	for(;;) {
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			if (STDC_LT(*pr,*pm)) SWAP(*pr,*pm);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			vp = *pm;
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(*pi,vp));
				do --pj; while (STDC_LT(vp,*pj));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, *pt);) {
				*pj-- = *pt--;
			}
			*pj = vp;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int quicksortBool(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Bool *pl = (Bool *) buffers[0]; 
	Bool *pr = pl + niter - 1;

	quicksort0Bool(pl, pr);
	return 0;
}

UFUNC_DESCR1(quicksortBool, sizeof(Bool));


void  aquicksort0Bool(long *pl, long *pr, Bool *v)
{
	Bool vp;
	long SWAP_temp;
	long *stack[100], **sptr = stack, *pm, *pi, *pj, *pt, vi;
        
	for(;;) {	
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			if (STDC_LT(v[*pr],v[*pm])) SWAP(*pr,*pm);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			vp = v[*pm];
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(v[*pi],vp));
				do --pj; while (STDC_LT(vp,v[*pj]));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, v[*pt]);) {
				*pj-- = *pt--;
			}
			*pj = vi;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int aquicksortBool(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Bool *v = (Bool *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;

	aquicksort0Bool(pl, pr, v);
	return 0;
}

UFUNC_DESCR2(aquicksortBool, sizeof(Bool), sizeof(long));


void mergesort0Bool(Bool *pl, Bool *pr, Bool *pw)
{
	Bool vp, *pi, *pj, *pk, *pm;
	
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		mergesort0Bool(pl,pm-1,pw);
		mergesort0Bool(pm,pr,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(*pk,*pj)) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, *pk); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vp;
		}
        }
}

static int mergesortBool(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Bool *pl = (Bool *) buffers[0];
	Bool *pr = pl + niter - 1;
	Bool *pw = (Bool *) malloc(((1 + niter/2))*sizeof(Bool));

	if (pw != NULL) {
		mergesort0Bool(pl, pr, pw);
		free(pw);
		return 0;
	}else{
		return -1;
	}
}

UFUNC_DESCR1(mergesortBool, sizeof(Bool));


void amergesort0Bool(long *pl, long *pr, Bool *v, long *pw)
{
        Bool vp;
	long vi, *pi, *pj, *pk, *pm;
        
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		amergesort0Bool(pl,pm-1,v,pw);
		amergesort0Bool(pm,pr,v,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(v[*pk],v[*pj])) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}	
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, v[*pk]); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vi;
		}
	}
}

static int amergesortBool(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Bool *v  = (Bool *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;
	long *pw = (long *) malloc(((1 + niter/2))*sizeof(long));

	if (pw != NULL) {
		amergesort0Bool(pl, pr, v, pw);
		free(pw);
		return 0;
	}else{
		return -1;
       }
}

UFUNC_DESCR2(amergesortBool, sizeof(Bool), sizeof(long));


void heapsort0Bool(Bool *a, long n)
{
	Bool tmp;
	long i,j,l;

	/* The array needs to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1])) 
				j += 1;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1]))
				j++;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int heapsortBool(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Bool *pl = (Bool *) buffers[0];
        
	heapsort0Bool(pl, niter);
	return 0;
}

UFUNC_DESCR1(heapsortBool, sizeof(Bool));


void aheapsort0Bool(long *a, long n, Bool *v)
{
	long i,j,l, tmp;

	/* The arrays need to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]])) 
				j += 1;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]]))
				j++;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int aheapsortBool(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Bool *v = (Bool *) buffers[0];
	long *pl = (long *) buffers[1]; 

	aheapsort0Bool(pl, niter, v);
	return 0;
}

UFUNC_DESCR2(aheapsortBool, sizeof(Bool), sizeof(long));


void sort0Bool(Bool *v, long left, long right)
{
  Bool temp;
  long i, last, lastl, lastr, diff;

  diff = right - left;
  if (diff <= 0)
     return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  /* Split array into values < pivot, and values > pivot. */
  swap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      swap(last, i);
    }
  swap(left, last);
  
  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  sort0Bool(v, left, lastl);
  sort0Bool(v, lastr, right);
}

static int sortBool(long niter, long ninargs, long noutargs,
           void **buffers, long *bsizes)
{
    BEGIN_THREADS
    sort0Bool( (Bool *) buffers[0], 0, niter-1);
    END_THREADS
    return 0;
}

UFUNC_DESCR1(sortBool, sizeof(Bool));

void asort0Bool(Bool *v, long *w, long left, long right)
{
  Bool temp;
  long  wtemp, i, last, lastl, lastr;

  if (left >= right)
    return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  wswap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      wswap(last, i);
    }
  wswap(left, last);

  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  asort0Bool(v, w, left, lastl);
  asort0Bool(v, w, lastr, right);
}

static int asortBool(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
    BEGIN_THREADS
    asort0Bool( (Bool *) buffers[0], (long *) buffers[1],
       0, niter-1);
    END_THREADS
    return 0;
}

CFUNC_DESCR(asortBool, CFUNC_UFUNC, CHECK_ALIGN, 0, 2, sizeof(Bool), sizeof(long), 0, 0, 0, 0);

static long searchBool(long na, Bool *a, Bool v)
{
  long i = 0;
  Bool *b;
  while(na > 10)
    {
      long mid = na>>1;
      if (STDC_LE(v, a[mid]))
        na = mid;
      else
        {
          i += mid;
          a += mid;
          na -= mid;
        }
    }
  b = a;
  while((STDC_LT(*b, v)) && na--)
    ++ b;
  return i+(b-a);
}

static int searchsortedBool(int niter, int ninargs, int noutargs,
          void **buffers, long *bsizes)
{
  maybelong   nbins;   
  Bool *bins;
  Bool *values;
  long       *indices;
  maybelong i;
  
  if (NA_checkOneCBuffer("searchsortedBool", 1,
       buffers[0], bsizes[0], sizeof(maybelong)))
       return -1;
  nbins    = *(maybelong *)  buffers[0];

  if (NA_checkOneCBuffer("searchsortedBool", nbins,
      buffers[1], bsizes[1], sizeof(Bool)))
      return -1;
  bins    = (Bool *) buffers[1];

  if (NA_checkOneCBuffer("searchsortedBool", niter,
      buffers[2], bsizes[2], sizeof(Bool)))
      return -1;
  values  = (Bool *) buffers[2];

  if (NA_checkOneCBuffer("searchsortedBool", niter,
      buffers[3], bsizes[3], sizeof(long)))
      return -1;
  indices = (long *)   buffers[3];

  if (NA_checkIo("searchsortedBool", 3, 1, ninargs, noutargs))
     return -1;

  BEGIN_THREADS
  for(i=0; i<niter; i++)
     indices[i] = searchBool(nbins, bins, values[i]);
  END_THREADS
  return 0;
}

SELF_CHECKED_CFUNC_DESCR(searchsortedBool, CFUNC_UFUNC);

static int fillarrayBool (long niter, long ninargs, long noutargs, 
			     void **buffers, long *bsizes) {
    long i;
    Bool start, delta;
    Bool *tin;
    Bool *tparams;
    Int8 itemsizes[2] = { sizeof(Bool), sizeof(Bool) };
    Int8 iters[2] = { 0, 2 };

    if (NA_checkIo("fillarrayBool", 1, 1, ninargs, noutargs) ||
	NA_checkNCBuffers("fillarrayBool", 2,
			 niter, buffers, bsizes, itemsizes, iters))
	    return -1;

    tin = (Bool *) buffers[0];
    tparams = (Bool *) buffers[1];
    start = tparams[0];
    delta = tparams[1];

    BEGIN_THREADS
    for (i=0; i<niter; i++, tin++) {
        *tin = start;  start += delta;
    }
    END_THREADS
    return 0;
}

SELF_CHECKED_CFUNC_DESCR(fillarrayBool, CFUNC_UFUNC);

static int nonzeroCoordsBool(long niter, long ninargs, long noutargs,
                               void **buffers, long *bsizes)
{
	Bool   *source;
	maybelong  *sstride;
	long  **output;
	long i, minbsize, next = 0;
	
	if (NA_checkIo("nonzeroCoordsBool", 2, noutargs, ninargs, noutargs))
		return -1;

	if (NA_checkOneCBuffer("nonzeroCoordsBool", niter,
		      buffers[0], bsizes[0], sizeof(Bool)))
		return -1;
	    
	if (NA_checkOneCBuffer("nonzeroCoordsBool", noutargs,
		      buffers[1], bsizes[1], sizeof(maybelong)))
		return -1;

	/* Check alignment only.  Storage optimization makes numarray
	 shorter than niter likely, possibly even 0! */
	for (i=0; i<noutargs; i++)
		if (NA_checkOneCBuffer("nonzeroCoordsBool", 0,
			      buffers[2+i], bsizes[2+i], sizeof(long)))
			return -1;

	source     = (Bool *)  buffers[0];
	sstride    = (maybelong  *)  buffers[1];
	output     = (long **) &buffers[2];

	minbsize = bsizes[0];
	for(i=0; i<noutargs; i++)
		if (sstride[i] <= 0) {
			PyErr_Format(PyExc_ValueError,
			     "nonzeroCoordsBool: bad stride[%ld].\n",
				     i);
			return -1;
		}
		else if (bsizes[i+2] < minbsize)
			minbsize = bsizes[i+2];
	
	next = 0;
	for(i=0; i<niter; i++)
	{
		if (source[i] != 0)
		{
			long j, index = i;
			if (next >= minbsize) {
				PyErr_Format(PyExc_ValueError,
					     "nonzeroCoordsBool: insufficient index space.\n");
				return -1;
			}
			for(j=0; j<noutargs; j++)
			{
				output[j][next] = index / sstride[j];
                                index %= sstride[j];
			}
			++ next;
		}
	}
	return 0;
}

SELF_CHECKED_CFUNC_DESCR(nonzeroCoordsBool, CFUNC_UFUNC);


/****************** Int8 *******************/


void  quicksort0Int8(Int8 *pl, Int8 *pr)
{
	Int8 vp, SWAP_temp;
	Int8 *stack[100], **sptr = stack, *pm, *pi, *pj, *pt;
        
	for(;;) {
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			if (STDC_LT(*pr,*pm)) SWAP(*pr,*pm);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			vp = *pm;
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(*pi,vp));
				do --pj; while (STDC_LT(vp,*pj));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, *pt);) {
				*pj-- = *pt--;
			}
			*pj = vp;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int quicksortInt8(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int8 *pl = (Int8 *) buffers[0]; 
	Int8 *pr = pl + niter - 1;

	quicksort0Int8(pl, pr);
	return 0;
}

UFUNC_DESCR1(quicksortInt8, sizeof(Int8));


void  aquicksort0Int8(long *pl, long *pr, Int8 *v)
{
	Int8 vp;
	long SWAP_temp;
	long *stack[100], **sptr = stack, *pm, *pi, *pj, *pt, vi;
        
	for(;;) {	
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			if (STDC_LT(v[*pr],v[*pm])) SWAP(*pr,*pm);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			vp = v[*pm];
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(v[*pi],vp));
				do --pj; while (STDC_LT(vp,v[*pj]));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, v[*pt]);) {
				*pj-- = *pt--;
			}
			*pj = vi;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int aquicksortInt8(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int8 *v = (Int8 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;

	aquicksort0Int8(pl, pr, v);
	return 0;
}

UFUNC_DESCR2(aquicksortInt8, sizeof(Int8), sizeof(long));


void mergesort0Int8(Int8 *pl, Int8 *pr, Int8 *pw)
{
	Int8 vp, *pi, *pj, *pk, *pm;
	
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		mergesort0Int8(pl,pm-1,pw);
		mergesort0Int8(pm,pr,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(*pk,*pj)) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, *pk); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vp;
		}
        }
}

static int mergesortInt8(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int8 *pl = (Int8 *) buffers[0];
	Int8 *pr = pl + niter - 1;
	Int8 *pw = (Int8 *) malloc(((1 + niter/2))*sizeof(Int8));

	if (pw != NULL) {
		mergesort0Int8(pl, pr, pw);
		free(pw);
		return 0;
	}else{
		return -1;
	}
}

UFUNC_DESCR1(mergesortInt8, sizeof(Int8));


void amergesort0Int8(long *pl, long *pr, Int8 *v, long *pw)
{
        Int8 vp;
	long vi, *pi, *pj, *pk, *pm;
        
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		amergesort0Int8(pl,pm-1,v,pw);
		amergesort0Int8(pm,pr,v,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(v[*pk],v[*pj])) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}	
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, v[*pk]); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vi;
		}
	}
}

static int amergesortInt8(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int8 *v  = (Int8 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;
	long *pw = (long *) malloc(((1 + niter/2))*sizeof(long));

	if (pw != NULL) {
		amergesort0Int8(pl, pr, v, pw);
		free(pw);
		return 0;
	}else{
		return -1;
       }
}

UFUNC_DESCR2(amergesortInt8, sizeof(Int8), sizeof(long));


void heapsort0Int8(Int8 *a, long n)
{
	Int8 tmp;
	long i,j,l;

	/* The array needs to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1])) 
				j += 1;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1]))
				j++;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int heapsortInt8(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int8 *pl = (Int8 *) buffers[0];
        
	heapsort0Int8(pl, niter);
	return 0;
}

UFUNC_DESCR1(heapsortInt8, sizeof(Int8));


void aheapsort0Int8(long *a, long n, Int8 *v)
{
	long i,j,l, tmp;

	/* The arrays need to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]])) 
				j += 1;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]]))
				j++;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int aheapsortInt8(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int8 *v = (Int8 *) buffers[0];
	long *pl = (long *) buffers[1]; 

	aheapsort0Int8(pl, niter, v);
	return 0;
}

UFUNC_DESCR2(aheapsortInt8, sizeof(Int8), sizeof(long));


void sort0Int8(Int8 *v, long left, long right)
{
  Int8 temp;
  long i, last, lastl, lastr, diff;

  diff = right - left;
  if (diff <= 0)
     return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  /* Split array into values < pivot, and values > pivot. */
  swap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      swap(last, i);
    }
  swap(left, last);
  
  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  sort0Int8(v, left, lastl);
  sort0Int8(v, lastr, right);
}

static int sortInt8(long niter, long ninargs, long noutargs,
           void **buffers, long *bsizes)
{
    BEGIN_THREADS
    sort0Int8( (Int8 *) buffers[0], 0, niter-1);
    END_THREADS
    return 0;
}

UFUNC_DESCR1(sortInt8, sizeof(Int8));

void asort0Int8(Int8 *v, long *w, long left, long right)
{
  Int8 temp;
  long  wtemp, i, last, lastl, lastr;

  if (left >= right)
    return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  wswap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      wswap(last, i);
    }
  wswap(left, last);

  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  asort0Int8(v, w, left, lastl);
  asort0Int8(v, w, lastr, right);
}

static int asortInt8(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
    BEGIN_THREADS
    asort0Int8( (Int8 *) buffers[0], (long *) buffers[1],
       0, niter-1);
    END_THREADS
    return 0;
}

CFUNC_DESCR(asortInt8, CFUNC_UFUNC, CHECK_ALIGN, 0, 2, sizeof(Int8), sizeof(long), 0, 0, 0, 0);

static long searchInt8(long na, Int8 *a, Int8 v)
{
  long i = 0;
  Int8 *b;
  while(na > 10)
    {
      long mid = na>>1;
      if (STDC_LE(v, a[mid]))
        na = mid;
      else
        {
          i += mid;
          a += mid;
          na -= mid;
        }
    }
  b = a;
  while((STDC_LT(*b, v)) && na--)
    ++ b;
  return i+(b-a);
}

static int searchsortedInt8(int niter, int ninargs, int noutargs,
          void **buffers, long *bsizes)
{
  maybelong   nbins;   
  Int8 *bins;
  Int8 *values;
  long       *indices;
  maybelong i;
  
  if (NA_checkOneCBuffer("searchsortedInt8", 1,
       buffers[0], bsizes[0], sizeof(maybelong)))
       return -1;
  nbins    = *(maybelong *)  buffers[0];

  if (NA_checkOneCBuffer("searchsortedInt8", nbins,
      buffers[1], bsizes[1], sizeof(Int8)))
      return -1;
  bins    = (Int8 *) buffers[1];

  if (NA_checkOneCBuffer("searchsortedInt8", niter,
      buffers[2], bsizes[2], sizeof(Int8)))
      return -1;
  values  = (Int8 *) buffers[2];

  if (NA_checkOneCBuffer("searchsortedInt8", niter,
      buffers[3], bsizes[3], sizeof(long)))
      return -1;
  indices = (long *)   buffers[3];

  if (NA_checkIo("searchsortedInt8", 3, 1, ninargs, noutargs))
     return -1;

  BEGIN_THREADS
  for(i=0; i<niter; i++)
     indices[i] = searchInt8(nbins, bins, values[i]);
  END_THREADS
  return 0;
}

SELF_CHECKED_CFUNC_DESCR(searchsortedInt8, CFUNC_UFUNC);

static int fillarrayInt8 (long niter, long ninargs, long noutargs, 
			     void **buffers, long *bsizes) {
    long i;
    Int8 start, delta;
    Int8 *tin;
    Int8 *tparams;
    Int8 itemsizes[2] = { sizeof(Int8), sizeof(Int8) };
    Int8 iters[2] = { 0, 2 };

    if (NA_checkIo("fillarrayInt8", 1, 1, ninargs, noutargs) ||
	NA_checkNCBuffers("fillarrayInt8", 2,
			 niter, buffers, bsizes, itemsizes, iters))
	    return -1;

    tin = (Int8 *) buffers[0];
    tparams = (Int8 *) buffers[1];
    start = tparams[0];
    delta = tparams[1];

    BEGIN_THREADS
    for (i=0; i<niter; i++, tin++) {
        *tin = start;  start += delta;
    }
    END_THREADS
    return 0;
}

SELF_CHECKED_CFUNC_DESCR(fillarrayInt8, CFUNC_UFUNC);

static int nonzeroCoordsInt8(long niter, long ninargs, long noutargs,
                               void **buffers, long *bsizes)
{
	Int8   *source;
	maybelong  *sstride;
	long  **output;
	long i, minbsize, next = 0;
	
	if (NA_checkIo("nonzeroCoordsInt8", 2, noutargs, ninargs, noutargs))
		return -1;

	if (NA_checkOneCBuffer("nonzeroCoordsInt8", niter,
		      buffers[0], bsizes[0], sizeof(Int8)))
		return -1;
	    
	if (NA_checkOneCBuffer("nonzeroCoordsInt8", noutargs,
		      buffers[1], bsizes[1], sizeof(maybelong)))
		return -1;

	/* Check alignment only.  Storage optimization makes numarray
	 shorter than niter likely, possibly even 0! */
	for (i=0; i<noutargs; i++)
		if (NA_checkOneCBuffer("nonzeroCoordsInt8", 0,
			      buffers[2+i], bsizes[2+i], sizeof(long)))
			return -1;

	source     = (Int8 *)  buffers[0];
	sstride    = (maybelong  *)  buffers[1];
	output     = (long **) &buffers[2];

	minbsize = bsizes[0];
	for(i=0; i<noutargs; i++)
		if (sstride[i] <= 0) {
			PyErr_Format(PyExc_ValueError,
			     "nonzeroCoordsInt8: bad stride[%ld].\n",
				     i);
			return -1;
		}
		else if (bsizes[i+2] < minbsize)
			minbsize = bsizes[i+2];
	
	next = 0;
	for(i=0; i<niter; i++)
	{
		if (source[i] != 0)
		{
			long j, index = i;
			if (next >= minbsize) {
				PyErr_Format(PyExc_ValueError,
					     "nonzeroCoordsInt8: insufficient index space.\n");
				return -1;
			}
			for(j=0; j<noutargs; j++)
			{
				output[j][next] = index / sstride[j];
                                index %= sstride[j];
			}
			++ next;
		}
	}
	return 0;
}

SELF_CHECKED_CFUNC_DESCR(nonzeroCoordsInt8, CFUNC_UFUNC);


/****************** UInt8 *******************/


void  quicksort0UInt8(UInt8 *pl, UInt8 *pr)
{
	UInt8 vp, SWAP_temp;
	UInt8 *stack[100], **sptr = stack, *pm, *pi, *pj, *pt;
        
	for(;;) {
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			if (STDC_LT(*pr,*pm)) SWAP(*pr,*pm);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			vp = *pm;
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(*pi,vp));
				do --pj; while (STDC_LT(vp,*pj));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, *pt);) {
				*pj-- = *pt--;
			}
			*pj = vp;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int quicksortUInt8(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt8 *pl = (UInt8 *) buffers[0]; 
	UInt8 *pr = pl + niter - 1;

	quicksort0UInt8(pl, pr);
	return 0;
}

UFUNC_DESCR1(quicksortUInt8, sizeof(UInt8));


void  aquicksort0UInt8(long *pl, long *pr, UInt8 *v)
{
	UInt8 vp;
	long SWAP_temp;
	long *stack[100], **sptr = stack, *pm, *pi, *pj, *pt, vi;
        
	for(;;) {	
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			if (STDC_LT(v[*pr],v[*pm])) SWAP(*pr,*pm);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			vp = v[*pm];
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(v[*pi],vp));
				do --pj; while (STDC_LT(vp,v[*pj]));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, v[*pt]);) {
				*pj-- = *pt--;
			}
			*pj = vi;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int aquicksortUInt8(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt8 *v = (UInt8 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;

	aquicksort0UInt8(pl, pr, v);
	return 0;
}

UFUNC_DESCR2(aquicksortUInt8, sizeof(UInt8), sizeof(long));


void mergesort0UInt8(UInt8 *pl, UInt8 *pr, UInt8 *pw)
{
	UInt8 vp, *pi, *pj, *pk, *pm;
	
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		mergesort0UInt8(pl,pm-1,pw);
		mergesort0UInt8(pm,pr,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(*pk,*pj)) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, *pk); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vp;
		}
        }
}

static int mergesortUInt8(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt8 *pl = (UInt8 *) buffers[0];
	UInt8 *pr = pl + niter - 1;
	UInt8 *pw = (UInt8 *) malloc(((1 + niter/2))*sizeof(UInt8));

	if (pw != NULL) {
		mergesort0UInt8(pl, pr, pw);
		free(pw);
		return 0;
	}else{
		return -1;
	}
}

UFUNC_DESCR1(mergesortUInt8, sizeof(UInt8));


void amergesort0UInt8(long *pl, long *pr, UInt8 *v, long *pw)
{
        UInt8 vp;
	long vi, *pi, *pj, *pk, *pm;
        
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		amergesort0UInt8(pl,pm-1,v,pw);
		amergesort0UInt8(pm,pr,v,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(v[*pk],v[*pj])) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}	
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, v[*pk]); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vi;
		}
	}
}

static int amergesortUInt8(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt8 *v  = (UInt8 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;
	long *pw = (long *) malloc(((1 + niter/2))*sizeof(long));

	if (pw != NULL) {
		amergesort0UInt8(pl, pr, v, pw);
		free(pw);
		return 0;
	}else{
		return -1;
       }
}

UFUNC_DESCR2(amergesortUInt8, sizeof(UInt8), sizeof(long));


void heapsort0UInt8(UInt8 *a, long n)
{
	UInt8 tmp;
	long i,j,l;

	/* The array needs to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1])) 
				j += 1;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1]))
				j++;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int heapsortUInt8(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt8 *pl = (UInt8 *) buffers[0];
        
	heapsort0UInt8(pl, niter);
	return 0;
}

UFUNC_DESCR1(heapsortUInt8, sizeof(UInt8));


void aheapsort0UInt8(long *a, long n, UInt8 *v)
{
	long i,j,l, tmp;

	/* The arrays need to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]])) 
				j += 1;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]]))
				j++;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int aheapsortUInt8(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt8 *v = (UInt8 *) buffers[0];
	long *pl = (long *) buffers[1]; 

	aheapsort0UInt8(pl, niter, v);
	return 0;
}

UFUNC_DESCR2(aheapsortUInt8, sizeof(UInt8), sizeof(long));


void sort0UInt8(UInt8 *v, long left, long right)
{
  UInt8 temp;
  long i, last, lastl, lastr, diff;

  diff = right - left;
  if (diff <= 0)
     return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  /* Split array into values < pivot, and values > pivot. */
  swap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      swap(last, i);
    }
  swap(left, last);
  
  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  sort0UInt8(v, left, lastl);
  sort0UInt8(v, lastr, right);
}

static int sortUInt8(long niter, long ninargs, long noutargs,
           void **buffers, long *bsizes)
{
    BEGIN_THREADS
    sort0UInt8( (UInt8 *) buffers[0], 0, niter-1);
    END_THREADS
    return 0;
}

UFUNC_DESCR1(sortUInt8, sizeof(UInt8));

void asort0UInt8(UInt8 *v, long *w, long left, long right)
{
  UInt8 temp;
  long  wtemp, i, last, lastl, lastr;

  if (left >= right)
    return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  wswap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      wswap(last, i);
    }
  wswap(left, last);

  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  asort0UInt8(v, w, left, lastl);
  asort0UInt8(v, w, lastr, right);
}

static int asortUInt8(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
    BEGIN_THREADS
    asort0UInt8( (UInt8 *) buffers[0], (long *) buffers[1],
       0, niter-1);
    END_THREADS
    return 0;
}

CFUNC_DESCR(asortUInt8, CFUNC_UFUNC, CHECK_ALIGN, 0, 2, sizeof(UInt8), sizeof(long), 0, 0, 0, 0);

static long searchUInt8(long na, UInt8 *a, UInt8 v)
{
  long i = 0;
  UInt8 *b;
  while(na > 10)
    {
      long mid = na>>1;
      if (STDC_LE(v, a[mid]))
        na = mid;
      else
        {
          i += mid;
          a += mid;
          na -= mid;
        }
    }
  b = a;
  while((STDC_LT(*b, v)) && na--)
    ++ b;
  return i+(b-a);
}

static int searchsortedUInt8(int niter, int ninargs, int noutargs,
          void **buffers, long *bsizes)
{
  maybelong   nbins;   
  UInt8 *bins;
  UInt8 *values;
  long       *indices;
  maybelong i;
  
  if (NA_checkOneCBuffer("searchsortedUInt8", 1,
       buffers[0], bsizes[0], sizeof(maybelong)))
       return -1;
  nbins    = *(maybelong *)  buffers[0];

  if (NA_checkOneCBuffer("searchsortedUInt8", nbins,
      buffers[1], bsizes[1], sizeof(UInt8)))
      return -1;
  bins    = (UInt8 *) buffers[1];

  if (NA_checkOneCBuffer("searchsortedUInt8", niter,
      buffers[2], bsizes[2], sizeof(UInt8)))
      return -1;
  values  = (UInt8 *) buffers[2];

  if (NA_checkOneCBuffer("searchsortedUInt8", niter,
      buffers[3], bsizes[3], sizeof(long)))
      return -1;
  indices = (long *)   buffers[3];

  if (NA_checkIo("searchsortedUInt8", 3, 1, ninargs, noutargs))
     return -1;

  BEGIN_THREADS
  for(i=0; i<niter; i++)
     indices[i] = searchUInt8(nbins, bins, values[i]);
  END_THREADS
  return 0;
}

SELF_CHECKED_CFUNC_DESCR(searchsortedUInt8, CFUNC_UFUNC);

static int fillarrayUInt8 (long niter, long ninargs, long noutargs, 
			     void **buffers, long *bsizes) {
    long i;
    UInt8 start, delta;
    UInt8 *tin;
    UInt8 *tparams;
    Int8 itemsizes[2] = { sizeof(UInt8), sizeof(UInt8) };
    Int8 iters[2] = { 0, 2 };

    if (NA_checkIo("fillarrayUInt8", 1, 1, ninargs, noutargs) ||
	NA_checkNCBuffers("fillarrayUInt8", 2,
			 niter, buffers, bsizes, itemsizes, iters))
	    return -1;

    tin = (UInt8 *) buffers[0];
    tparams = (UInt8 *) buffers[1];
    start = tparams[0];
    delta = tparams[1];

    BEGIN_THREADS
    for (i=0; i<niter; i++, tin++) {
        *tin = start;  start += delta;
    }
    END_THREADS
    return 0;
}

SELF_CHECKED_CFUNC_DESCR(fillarrayUInt8, CFUNC_UFUNC);

static int nonzeroCoordsUInt8(long niter, long ninargs, long noutargs,
                               void **buffers, long *bsizes)
{
	UInt8   *source;
	maybelong  *sstride;
	long  **output;
	long i, minbsize, next = 0;
	
	if (NA_checkIo("nonzeroCoordsUInt8", 2, noutargs, ninargs, noutargs))
		return -1;

	if (NA_checkOneCBuffer("nonzeroCoordsUInt8", niter,
		      buffers[0], bsizes[0], sizeof(UInt8)))
		return -1;
	    
	if (NA_checkOneCBuffer("nonzeroCoordsUInt8", noutargs,
		      buffers[1], bsizes[1], sizeof(maybelong)))
		return -1;

	/* Check alignment only.  Storage optimization makes numarray
	 shorter than niter likely, possibly even 0! */
	for (i=0; i<noutargs; i++)
		if (NA_checkOneCBuffer("nonzeroCoordsUInt8", 0,
			      buffers[2+i], bsizes[2+i], sizeof(long)))
			return -1;

	source     = (UInt8 *)  buffers[0];
	sstride    = (maybelong  *)  buffers[1];
	output     = (long **) &buffers[2];

	minbsize = bsizes[0];
	for(i=0; i<noutargs; i++)
		if (sstride[i] <= 0) {
			PyErr_Format(PyExc_ValueError,
			     "nonzeroCoordsUInt8: bad stride[%ld].\n",
				     i);
			return -1;
		}
		else if (bsizes[i+2] < minbsize)
			minbsize = bsizes[i+2];
	
	next = 0;
	for(i=0; i<niter; i++)
	{
		if (source[i] != 0)
		{
			long j, index = i;
			if (next >= minbsize) {
				PyErr_Format(PyExc_ValueError,
					     "nonzeroCoordsUInt8: insufficient index space.\n");
				return -1;
			}
			for(j=0; j<noutargs; j++)
			{
				output[j][next] = index / sstride[j];
                                index %= sstride[j];
			}
			++ next;
		}
	}
	return 0;
}

SELF_CHECKED_CFUNC_DESCR(nonzeroCoordsUInt8, CFUNC_UFUNC);


/****************** Int16 *******************/


void  quicksort0Int16(Int16 *pl, Int16 *pr)
{
	Int16 vp, SWAP_temp;
	Int16 *stack[100], **sptr = stack, *pm, *pi, *pj, *pt;
        
	for(;;) {
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			if (STDC_LT(*pr,*pm)) SWAP(*pr,*pm);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			vp = *pm;
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(*pi,vp));
				do --pj; while (STDC_LT(vp,*pj));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, *pt);) {
				*pj-- = *pt--;
			}
			*pj = vp;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int quicksortInt16(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int16 *pl = (Int16 *) buffers[0]; 
	Int16 *pr = pl + niter - 1;

	quicksort0Int16(pl, pr);
	return 0;
}

UFUNC_DESCR1(quicksortInt16, sizeof(Int16));


void  aquicksort0Int16(long *pl, long *pr, Int16 *v)
{
	Int16 vp;
	long SWAP_temp;
	long *stack[100], **sptr = stack, *pm, *pi, *pj, *pt, vi;
        
	for(;;) {	
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			if (STDC_LT(v[*pr],v[*pm])) SWAP(*pr,*pm);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			vp = v[*pm];
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(v[*pi],vp));
				do --pj; while (STDC_LT(vp,v[*pj]));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, v[*pt]);) {
				*pj-- = *pt--;
			}
			*pj = vi;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int aquicksortInt16(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int16 *v = (Int16 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;

	aquicksort0Int16(pl, pr, v);
	return 0;
}

UFUNC_DESCR2(aquicksortInt16, sizeof(Int16), sizeof(long));


void mergesort0Int16(Int16 *pl, Int16 *pr, Int16 *pw)
{
	Int16 vp, *pi, *pj, *pk, *pm;
	
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		mergesort0Int16(pl,pm-1,pw);
		mergesort0Int16(pm,pr,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(*pk,*pj)) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, *pk); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vp;
		}
        }
}

static int mergesortInt16(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int16 *pl = (Int16 *) buffers[0];
	Int16 *pr = pl + niter - 1;
	Int16 *pw = (Int16 *) malloc(((1 + niter/2))*sizeof(Int16));

	if (pw != NULL) {
		mergesort0Int16(pl, pr, pw);
		free(pw);
		return 0;
	}else{
		return -1;
	}
}

UFUNC_DESCR1(mergesortInt16, sizeof(Int16));


void amergesort0Int16(long *pl, long *pr, Int16 *v, long *pw)
{
        Int16 vp;
	long vi, *pi, *pj, *pk, *pm;
        
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		amergesort0Int16(pl,pm-1,v,pw);
		amergesort0Int16(pm,pr,v,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(v[*pk],v[*pj])) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}	
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, v[*pk]); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vi;
		}
	}
}

static int amergesortInt16(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int16 *v  = (Int16 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;
	long *pw = (long *) malloc(((1 + niter/2))*sizeof(long));

	if (pw != NULL) {
		amergesort0Int16(pl, pr, v, pw);
		free(pw);
		return 0;
	}else{
		return -1;
       }
}

UFUNC_DESCR2(amergesortInt16, sizeof(Int16), sizeof(long));


void heapsort0Int16(Int16 *a, long n)
{
	Int16 tmp;
	long i,j,l;

	/* The array needs to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1])) 
				j += 1;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1]))
				j++;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int heapsortInt16(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int16 *pl = (Int16 *) buffers[0];
        
	heapsort0Int16(pl, niter);
	return 0;
}

UFUNC_DESCR1(heapsortInt16, sizeof(Int16));


void aheapsort0Int16(long *a, long n, Int16 *v)
{
	long i,j,l, tmp;

	/* The arrays need to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]])) 
				j += 1;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]]))
				j++;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int aheapsortInt16(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int16 *v = (Int16 *) buffers[0];
	long *pl = (long *) buffers[1]; 

	aheapsort0Int16(pl, niter, v);
	return 0;
}

UFUNC_DESCR2(aheapsortInt16, sizeof(Int16), sizeof(long));


void sort0Int16(Int16 *v, long left, long right)
{
  Int16 temp;
  long i, last, lastl, lastr, diff;

  diff = right - left;
  if (diff <= 0)
     return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  /* Split array into values < pivot, and values > pivot. */
  swap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      swap(last, i);
    }
  swap(left, last);
  
  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  sort0Int16(v, left, lastl);
  sort0Int16(v, lastr, right);
}

static int sortInt16(long niter, long ninargs, long noutargs,
           void **buffers, long *bsizes)
{
    BEGIN_THREADS
    sort0Int16( (Int16 *) buffers[0], 0, niter-1);
    END_THREADS
    return 0;
}

UFUNC_DESCR1(sortInt16, sizeof(Int16));

void asort0Int16(Int16 *v, long *w, long left, long right)
{
  Int16 temp;
  long  wtemp, i, last, lastl, lastr;

  if (left >= right)
    return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  wswap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      wswap(last, i);
    }
  wswap(left, last);

  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  asort0Int16(v, w, left, lastl);
  asort0Int16(v, w, lastr, right);
}

static int asortInt16(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
    BEGIN_THREADS
    asort0Int16( (Int16 *) buffers[0], (long *) buffers[1],
       0, niter-1);
    END_THREADS
    return 0;
}

CFUNC_DESCR(asortInt16, CFUNC_UFUNC, CHECK_ALIGN, 0, 2, sizeof(Int16), sizeof(long), 0, 0, 0, 0);

static long searchInt16(long na, Int16 *a, Int16 v)
{
  long i = 0;
  Int16 *b;
  while(na > 10)
    {
      long mid = na>>1;
      if (STDC_LE(v, a[mid]))
        na = mid;
      else
        {
          i += mid;
          a += mid;
          na -= mid;
        }
    }
  b = a;
  while((STDC_LT(*b, v)) && na--)
    ++ b;
  return i+(b-a);
}

static int searchsortedInt16(int niter, int ninargs, int noutargs,
          void **buffers, long *bsizes)
{
  maybelong   nbins;   
  Int16 *bins;
  Int16 *values;
  long       *indices;
  maybelong i;
  
  if (NA_checkOneCBuffer("searchsortedInt16", 1,
       buffers[0], bsizes[0], sizeof(maybelong)))
       return -1;
  nbins    = *(maybelong *)  buffers[0];

  if (NA_checkOneCBuffer("searchsortedInt16", nbins,
      buffers[1], bsizes[1], sizeof(Int16)))
      return -1;
  bins    = (Int16 *) buffers[1];

  if (NA_checkOneCBuffer("searchsortedInt16", niter,
      buffers[2], bsizes[2], sizeof(Int16)))
      return -1;
  values  = (Int16 *) buffers[2];

  if (NA_checkOneCBuffer("searchsortedInt16", niter,
      buffers[3], bsizes[3], sizeof(long)))
      return -1;
  indices = (long *)   buffers[3];

  if (NA_checkIo("searchsortedInt16", 3, 1, ninargs, noutargs))
     return -1;

  BEGIN_THREADS
  for(i=0; i<niter; i++)
     indices[i] = searchInt16(nbins, bins, values[i]);
  END_THREADS
  return 0;
}

SELF_CHECKED_CFUNC_DESCR(searchsortedInt16, CFUNC_UFUNC);

static int fillarrayInt16 (long niter, long ninargs, long noutargs, 
			     void **buffers, long *bsizes) {
    long i;
    Int16 start, delta;
    Int16 *tin;
    Int16 *tparams;
    Int8 itemsizes[2] = { sizeof(Int16), sizeof(Int16) };
    Int8 iters[2] = { 0, 2 };

    if (NA_checkIo("fillarrayInt16", 1, 1, ninargs, noutargs) ||
	NA_checkNCBuffers("fillarrayInt16", 2,
			 niter, buffers, bsizes, itemsizes, iters))
	    return -1;

    tin = (Int16 *) buffers[0];
    tparams = (Int16 *) buffers[1];
    start = tparams[0];
    delta = tparams[1];

    BEGIN_THREADS
    for (i=0; i<niter; i++, tin++) {
        *tin = start;  start += delta;
    }
    END_THREADS
    return 0;
}

SELF_CHECKED_CFUNC_DESCR(fillarrayInt16, CFUNC_UFUNC);

static int nonzeroCoordsInt16(long niter, long ninargs, long noutargs,
                               void **buffers, long *bsizes)
{
	Int16   *source;
	maybelong  *sstride;
	long  **output;
	long i, minbsize, next = 0;
	
	if (NA_checkIo("nonzeroCoordsInt16", 2, noutargs, ninargs, noutargs))
		return -1;

	if (NA_checkOneCBuffer("nonzeroCoordsInt16", niter,
		      buffers[0], bsizes[0], sizeof(Int16)))
		return -1;
	    
	if (NA_checkOneCBuffer("nonzeroCoordsInt16", noutargs,
		      buffers[1], bsizes[1], sizeof(maybelong)))
		return -1;

	/* Check alignment only.  Storage optimization makes numarray
	 shorter than niter likely, possibly even 0! */
	for (i=0; i<noutargs; i++)
		if (NA_checkOneCBuffer("nonzeroCoordsInt16", 0,
			      buffers[2+i], bsizes[2+i], sizeof(long)))
			return -1;

	source     = (Int16 *)  buffers[0];
	sstride    = (maybelong  *)  buffers[1];
	output     = (long **) &buffers[2];

	minbsize = bsizes[0];
	for(i=0; i<noutargs; i++)
		if (sstride[i] <= 0) {
			PyErr_Format(PyExc_ValueError,
			     "nonzeroCoordsInt16: bad stride[%ld].\n",
				     i);
			return -1;
		}
		else if (bsizes[i+2] < minbsize)
			minbsize = bsizes[i+2];
	
	next = 0;
	for(i=0; i<niter; i++)
	{
		if (source[i] != 0)
		{
			long j, index = i;
			if (next >= minbsize) {
				PyErr_Format(PyExc_ValueError,
					     "nonzeroCoordsInt16: insufficient index space.\n");
				return -1;
			}
			for(j=0; j<noutargs; j++)
			{
				output[j][next] = index / sstride[j];
                                index %= sstride[j];
			}
			++ next;
		}
	}
	return 0;
}

SELF_CHECKED_CFUNC_DESCR(nonzeroCoordsInt16, CFUNC_UFUNC);


/****************** UInt16 *******************/


void  quicksort0UInt16(UInt16 *pl, UInt16 *pr)
{
	UInt16 vp, SWAP_temp;
	UInt16 *stack[100], **sptr = stack, *pm, *pi, *pj, *pt;
        
	for(;;) {
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			if (STDC_LT(*pr,*pm)) SWAP(*pr,*pm);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			vp = *pm;
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(*pi,vp));
				do --pj; while (STDC_LT(vp,*pj));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, *pt);) {
				*pj-- = *pt--;
			}
			*pj = vp;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int quicksortUInt16(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt16 *pl = (UInt16 *) buffers[0]; 
	UInt16 *pr = pl + niter - 1;

	quicksort0UInt16(pl, pr);
	return 0;
}

UFUNC_DESCR1(quicksortUInt16, sizeof(UInt16));


void  aquicksort0UInt16(long *pl, long *pr, UInt16 *v)
{
	UInt16 vp;
	long SWAP_temp;
	long *stack[100], **sptr = stack, *pm, *pi, *pj, *pt, vi;
        
	for(;;) {	
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			if (STDC_LT(v[*pr],v[*pm])) SWAP(*pr,*pm);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			vp = v[*pm];
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(v[*pi],vp));
				do --pj; while (STDC_LT(vp,v[*pj]));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, v[*pt]);) {
				*pj-- = *pt--;
			}
			*pj = vi;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int aquicksortUInt16(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt16 *v = (UInt16 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;

	aquicksort0UInt16(pl, pr, v);
	return 0;
}

UFUNC_DESCR2(aquicksortUInt16, sizeof(UInt16), sizeof(long));


void mergesort0UInt16(UInt16 *pl, UInt16 *pr, UInt16 *pw)
{
	UInt16 vp, *pi, *pj, *pk, *pm;
	
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		mergesort0UInt16(pl,pm-1,pw);
		mergesort0UInt16(pm,pr,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(*pk,*pj)) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, *pk); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vp;
		}
        }
}

static int mergesortUInt16(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt16 *pl = (UInt16 *) buffers[0];
	UInt16 *pr = pl + niter - 1;
	UInt16 *pw = (UInt16 *) malloc(((1 + niter/2))*sizeof(UInt16));

	if (pw != NULL) {
		mergesort0UInt16(pl, pr, pw);
		free(pw);
		return 0;
	}else{
		return -1;
	}
}

UFUNC_DESCR1(mergesortUInt16, sizeof(UInt16));


void amergesort0UInt16(long *pl, long *pr, UInt16 *v, long *pw)
{
        UInt16 vp;
	long vi, *pi, *pj, *pk, *pm;
        
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		amergesort0UInt16(pl,pm-1,v,pw);
		amergesort0UInt16(pm,pr,v,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(v[*pk],v[*pj])) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}	
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, v[*pk]); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vi;
		}
	}
}

static int amergesortUInt16(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt16 *v  = (UInt16 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;
	long *pw = (long *) malloc(((1 + niter/2))*sizeof(long));

	if (pw != NULL) {
		amergesort0UInt16(pl, pr, v, pw);
		free(pw);
		return 0;
	}else{
		return -1;
       }
}

UFUNC_DESCR2(amergesortUInt16, sizeof(UInt16), sizeof(long));


void heapsort0UInt16(UInt16 *a, long n)
{
	UInt16 tmp;
	long i,j,l;

	/* The array needs to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1])) 
				j += 1;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1]))
				j++;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int heapsortUInt16(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt16 *pl = (UInt16 *) buffers[0];
        
	heapsort0UInt16(pl, niter);
	return 0;
}

UFUNC_DESCR1(heapsortUInt16, sizeof(UInt16));


void aheapsort0UInt16(long *a, long n, UInt16 *v)
{
	long i,j,l, tmp;

	/* The arrays need to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]])) 
				j += 1;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]]))
				j++;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int aheapsortUInt16(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt16 *v = (UInt16 *) buffers[0];
	long *pl = (long *) buffers[1]; 

	aheapsort0UInt16(pl, niter, v);
	return 0;
}

UFUNC_DESCR2(aheapsortUInt16, sizeof(UInt16), sizeof(long));


void sort0UInt16(UInt16 *v, long left, long right)
{
  UInt16 temp;
  long i, last, lastl, lastr, diff;

  diff = right - left;
  if (diff <= 0)
     return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  /* Split array into values < pivot, and values > pivot. */
  swap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      swap(last, i);
    }
  swap(left, last);
  
  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  sort0UInt16(v, left, lastl);
  sort0UInt16(v, lastr, right);
}

static int sortUInt16(long niter, long ninargs, long noutargs,
           void **buffers, long *bsizes)
{
    BEGIN_THREADS
    sort0UInt16( (UInt16 *) buffers[0], 0, niter-1);
    END_THREADS
    return 0;
}

UFUNC_DESCR1(sortUInt16, sizeof(UInt16));

void asort0UInt16(UInt16 *v, long *w, long left, long right)
{
  UInt16 temp;
  long  wtemp, i, last, lastl, lastr;

  if (left >= right)
    return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  wswap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      wswap(last, i);
    }
  wswap(left, last);

  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  asort0UInt16(v, w, left, lastl);
  asort0UInt16(v, w, lastr, right);
}

static int asortUInt16(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
    BEGIN_THREADS
    asort0UInt16( (UInt16 *) buffers[0], (long *) buffers[1],
       0, niter-1);
    END_THREADS
    return 0;
}

CFUNC_DESCR(asortUInt16, CFUNC_UFUNC, CHECK_ALIGN, 0, 2, sizeof(UInt16), sizeof(long), 0, 0, 0, 0);

static long searchUInt16(long na, UInt16 *a, UInt16 v)
{
  long i = 0;
  UInt16 *b;
  while(na > 10)
    {
      long mid = na>>1;
      if (STDC_LE(v, a[mid]))
        na = mid;
      else
        {
          i += mid;
          a += mid;
          na -= mid;
        }
    }
  b = a;
  while((STDC_LT(*b, v)) && na--)
    ++ b;
  return i+(b-a);
}

static int searchsortedUInt16(int niter, int ninargs, int noutargs,
          void **buffers, long *bsizes)
{
  maybelong   nbins;   
  UInt16 *bins;
  UInt16 *values;
  long       *indices;
  maybelong i;
  
  if (NA_checkOneCBuffer("searchsortedUInt16", 1,
       buffers[0], bsizes[0], sizeof(maybelong)))
       return -1;
  nbins    = *(maybelong *)  buffers[0];

  if (NA_checkOneCBuffer("searchsortedUInt16", nbins,
      buffers[1], bsizes[1], sizeof(UInt16)))
      return -1;
  bins    = (UInt16 *) buffers[1];

  if (NA_checkOneCBuffer("searchsortedUInt16", niter,
      buffers[2], bsizes[2], sizeof(UInt16)))
      return -1;
  values  = (UInt16 *) buffers[2];

  if (NA_checkOneCBuffer("searchsortedUInt16", niter,
      buffers[3], bsizes[3], sizeof(long)))
      return -1;
  indices = (long *)   buffers[3];

  if (NA_checkIo("searchsortedUInt16", 3, 1, ninargs, noutargs))
     return -1;

  BEGIN_THREADS
  for(i=0; i<niter; i++)
     indices[i] = searchUInt16(nbins, bins, values[i]);
  END_THREADS
  return 0;
}

SELF_CHECKED_CFUNC_DESCR(searchsortedUInt16, CFUNC_UFUNC);

static int fillarrayUInt16 (long niter, long ninargs, long noutargs, 
			     void **buffers, long *bsizes) {
    long i;
    UInt16 start, delta;
    UInt16 *tin;
    UInt16 *tparams;
    Int8 itemsizes[2] = { sizeof(UInt16), sizeof(UInt16) };
    Int8 iters[2] = { 0, 2 };

    if (NA_checkIo("fillarrayUInt16", 1, 1, ninargs, noutargs) ||
	NA_checkNCBuffers("fillarrayUInt16", 2,
			 niter, buffers, bsizes, itemsizes, iters))
	    return -1;

    tin = (UInt16 *) buffers[0];
    tparams = (UInt16 *) buffers[1];
    start = tparams[0];
    delta = tparams[1];

    BEGIN_THREADS
    for (i=0; i<niter; i++, tin++) {
        *tin = start;  start += delta;
    }
    END_THREADS
    return 0;
}

SELF_CHECKED_CFUNC_DESCR(fillarrayUInt16, CFUNC_UFUNC);

static int nonzeroCoordsUInt16(long niter, long ninargs, long noutargs,
                               void **buffers, long *bsizes)
{
	UInt16   *source;
	maybelong  *sstride;
	long  **output;
	long i, minbsize, next = 0;
	
	if (NA_checkIo("nonzeroCoordsUInt16", 2, noutargs, ninargs, noutargs))
		return -1;

	if (NA_checkOneCBuffer("nonzeroCoordsUInt16", niter,
		      buffers[0], bsizes[0], sizeof(UInt16)))
		return -1;
	    
	if (NA_checkOneCBuffer("nonzeroCoordsUInt16", noutargs,
		      buffers[1], bsizes[1], sizeof(maybelong)))
		return -1;

	/* Check alignment only.  Storage optimization makes numarray
	 shorter than niter likely, possibly even 0! */
	for (i=0; i<noutargs; i++)
		if (NA_checkOneCBuffer("nonzeroCoordsUInt16", 0,
			      buffers[2+i], bsizes[2+i], sizeof(long)))
			return -1;

	source     = (UInt16 *)  buffers[0];
	sstride    = (maybelong  *)  buffers[1];
	output     = (long **) &buffers[2];

	minbsize = bsizes[0];
	for(i=0; i<noutargs; i++)
		if (sstride[i] <= 0) {
			PyErr_Format(PyExc_ValueError,
			     "nonzeroCoordsUInt16: bad stride[%ld].\n",
				     i);
			return -1;
		}
		else if (bsizes[i+2] < minbsize)
			minbsize = bsizes[i+2];
	
	next = 0;
	for(i=0; i<niter; i++)
	{
		if (source[i] != 0)
		{
			long j, index = i;
			if (next >= minbsize) {
				PyErr_Format(PyExc_ValueError,
					     "nonzeroCoordsUInt16: insufficient index space.\n");
				return -1;
			}
			for(j=0; j<noutargs; j++)
			{
				output[j][next] = index / sstride[j];
                                index %= sstride[j];
			}
			++ next;
		}
	}
	return 0;
}

SELF_CHECKED_CFUNC_DESCR(nonzeroCoordsUInt16, CFUNC_UFUNC);


/****************** Int32 *******************/


void  quicksort0Int32(Int32 *pl, Int32 *pr)
{
	Int32 vp, SWAP_temp;
	Int32 *stack[100], **sptr = stack, *pm, *pi, *pj, *pt;
        
	for(;;) {
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			if (STDC_LT(*pr,*pm)) SWAP(*pr,*pm);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			vp = *pm;
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(*pi,vp));
				do --pj; while (STDC_LT(vp,*pj));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, *pt);) {
				*pj-- = *pt--;
			}
			*pj = vp;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int quicksortInt32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int32 *pl = (Int32 *) buffers[0]; 
	Int32 *pr = pl + niter - 1;

	quicksort0Int32(pl, pr);
	return 0;
}

UFUNC_DESCR1(quicksortInt32, sizeof(Int32));


void  aquicksort0Int32(long *pl, long *pr, Int32 *v)
{
	Int32 vp;
	long SWAP_temp;
	long *stack[100], **sptr = stack, *pm, *pi, *pj, *pt, vi;
        
	for(;;) {	
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			if (STDC_LT(v[*pr],v[*pm])) SWAP(*pr,*pm);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			vp = v[*pm];
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(v[*pi],vp));
				do --pj; while (STDC_LT(vp,v[*pj]));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, v[*pt]);) {
				*pj-- = *pt--;
			}
			*pj = vi;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int aquicksortInt32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int32 *v = (Int32 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;

	aquicksort0Int32(pl, pr, v);
	return 0;
}

UFUNC_DESCR2(aquicksortInt32, sizeof(Int32), sizeof(long));


void mergesort0Int32(Int32 *pl, Int32 *pr, Int32 *pw)
{
	Int32 vp, *pi, *pj, *pk, *pm;
	
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		mergesort0Int32(pl,pm-1,pw);
		mergesort0Int32(pm,pr,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(*pk,*pj)) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, *pk); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vp;
		}
        }
}

static int mergesortInt32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int32 *pl = (Int32 *) buffers[0];
	Int32 *pr = pl + niter - 1;
	Int32 *pw = (Int32 *) malloc(((1 + niter/2))*sizeof(Int32));

	if (pw != NULL) {
		mergesort0Int32(pl, pr, pw);
		free(pw);
		return 0;
	}else{
		return -1;
	}
}

UFUNC_DESCR1(mergesortInt32, sizeof(Int32));


void amergesort0Int32(long *pl, long *pr, Int32 *v, long *pw)
{
        Int32 vp;
	long vi, *pi, *pj, *pk, *pm;
        
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		amergesort0Int32(pl,pm-1,v,pw);
		amergesort0Int32(pm,pr,v,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(v[*pk],v[*pj])) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}	
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, v[*pk]); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vi;
		}
	}
}

static int amergesortInt32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int32 *v  = (Int32 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;
	long *pw = (long *) malloc(((1 + niter/2))*sizeof(long));

	if (pw != NULL) {
		amergesort0Int32(pl, pr, v, pw);
		free(pw);
		return 0;
	}else{
		return -1;
       }
}

UFUNC_DESCR2(amergesortInt32, sizeof(Int32), sizeof(long));


void heapsort0Int32(Int32 *a, long n)
{
	Int32 tmp;
	long i,j,l;

	/* The array needs to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1])) 
				j += 1;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1]))
				j++;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int heapsortInt32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int32 *pl = (Int32 *) buffers[0];
        
	heapsort0Int32(pl, niter);
	return 0;
}

UFUNC_DESCR1(heapsortInt32, sizeof(Int32));


void aheapsort0Int32(long *a, long n, Int32 *v)
{
	long i,j,l, tmp;

	/* The arrays need to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]])) 
				j += 1;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]]))
				j++;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int aheapsortInt32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int32 *v = (Int32 *) buffers[0];
	long *pl = (long *) buffers[1]; 

	aheapsort0Int32(pl, niter, v);
	return 0;
}

UFUNC_DESCR2(aheapsortInt32, sizeof(Int32), sizeof(long));


void sort0Int32(Int32 *v, long left, long right)
{
  Int32 temp;
  long i, last, lastl, lastr, diff;

  diff = right - left;
  if (diff <= 0)
     return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  /* Split array into values < pivot, and values > pivot. */
  swap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      swap(last, i);
    }
  swap(left, last);
  
  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  sort0Int32(v, left, lastl);
  sort0Int32(v, lastr, right);
}

static int sortInt32(long niter, long ninargs, long noutargs,
           void **buffers, long *bsizes)
{
    BEGIN_THREADS
    sort0Int32( (Int32 *) buffers[0], 0, niter-1);
    END_THREADS
    return 0;
}

UFUNC_DESCR1(sortInt32, sizeof(Int32));

void asort0Int32(Int32 *v, long *w, long left, long right)
{
  Int32 temp;
  long  wtemp, i, last, lastl, lastr;

  if (left >= right)
    return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  wswap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      wswap(last, i);
    }
  wswap(left, last);

  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  asort0Int32(v, w, left, lastl);
  asort0Int32(v, w, lastr, right);
}

static int asortInt32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
    BEGIN_THREADS
    asort0Int32( (Int32 *) buffers[0], (long *) buffers[1],
       0, niter-1);
    END_THREADS
    return 0;
}

CFUNC_DESCR(asortInt32, CFUNC_UFUNC, CHECK_ALIGN, 0, 2, sizeof(Int32), sizeof(long), 0, 0, 0, 0);

static long searchInt32(long na, Int32 *a, Int32 v)
{
  long i = 0;
  Int32 *b;
  while(na > 10)
    {
      long mid = na>>1;
      if (STDC_LE(v, a[mid]))
        na = mid;
      else
        {
          i += mid;
          a += mid;
          na -= mid;
        }
    }
  b = a;
  while((STDC_LT(*b, v)) && na--)
    ++ b;
  return i+(b-a);
}

static int searchsortedInt32(int niter, int ninargs, int noutargs,
          void **buffers, long *bsizes)
{
  maybelong   nbins;   
  Int32 *bins;
  Int32 *values;
  long       *indices;
  maybelong i;
  
  if (NA_checkOneCBuffer("searchsortedInt32", 1,
       buffers[0], bsizes[0], sizeof(maybelong)))
       return -1;
  nbins    = *(maybelong *)  buffers[0];

  if (NA_checkOneCBuffer("searchsortedInt32", nbins,
      buffers[1], bsizes[1], sizeof(Int32)))
      return -1;
  bins    = (Int32 *) buffers[1];

  if (NA_checkOneCBuffer("searchsortedInt32", niter,
      buffers[2], bsizes[2], sizeof(Int32)))
      return -1;
  values  = (Int32 *) buffers[2];

  if (NA_checkOneCBuffer("searchsortedInt32", niter,
      buffers[3], bsizes[3], sizeof(long)))
      return -1;
  indices = (long *)   buffers[3];

  if (NA_checkIo("searchsortedInt32", 3, 1, ninargs, noutargs))
     return -1;

  BEGIN_THREADS
  for(i=0; i<niter; i++)
     indices[i] = searchInt32(nbins, bins, values[i]);
  END_THREADS
  return 0;
}

SELF_CHECKED_CFUNC_DESCR(searchsortedInt32, CFUNC_UFUNC);

static int fillarrayInt32 (long niter, long ninargs, long noutargs, 
			     void **buffers, long *bsizes) {
    long i;
    Int32 start, delta;
    Int32 *tin;
    Int32 *tparams;
    Int8 itemsizes[2] = { sizeof(Int32), sizeof(Int32) };
    Int8 iters[2] = { 0, 2 };

    if (NA_checkIo("fillarrayInt32", 1, 1, ninargs, noutargs) ||
	NA_checkNCBuffers("fillarrayInt32", 2,
			 niter, buffers, bsizes, itemsizes, iters))
	    return -1;

    tin = (Int32 *) buffers[0];
    tparams = (Int32 *) buffers[1];
    start = tparams[0];
    delta = tparams[1];

    BEGIN_THREADS
    for (i=0; i<niter; i++, tin++) {
        *tin = start;  start += delta;
    }
    END_THREADS
    return 0;
}

SELF_CHECKED_CFUNC_DESCR(fillarrayInt32, CFUNC_UFUNC);

static int nonzeroCoordsInt32(long niter, long ninargs, long noutargs,
                               void **buffers, long *bsizes)
{
	Int32   *source;
	maybelong  *sstride;
	long  **output;
	long i, minbsize, next = 0;
	
	if (NA_checkIo("nonzeroCoordsInt32", 2, noutargs, ninargs, noutargs))
		return -1;

	if (NA_checkOneCBuffer("nonzeroCoordsInt32", niter,
		      buffers[0], bsizes[0], sizeof(Int32)))
		return -1;
	    
	if (NA_checkOneCBuffer("nonzeroCoordsInt32", noutargs,
		      buffers[1], bsizes[1], sizeof(maybelong)))
		return -1;

	/* Check alignment only.  Storage optimization makes numarray
	 shorter than niter likely, possibly even 0! */
	for (i=0; i<noutargs; i++)
		if (NA_checkOneCBuffer("nonzeroCoordsInt32", 0,
			      buffers[2+i], bsizes[2+i], sizeof(long)))
			return -1;

	source     = (Int32 *)  buffers[0];
	sstride    = (maybelong  *)  buffers[1];
	output     = (long **) &buffers[2];

	minbsize = bsizes[0];
	for(i=0; i<noutargs; i++)
		if (sstride[i] <= 0) {
			PyErr_Format(PyExc_ValueError,
			     "nonzeroCoordsInt32: bad stride[%ld].\n",
				     i);
			return -1;
		}
		else if (bsizes[i+2] < minbsize)
			minbsize = bsizes[i+2];
	
	next = 0;
	for(i=0; i<niter; i++)
	{
		if (source[i] != 0)
		{
			long j, index = i;
			if (next >= minbsize) {
				PyErr_Format(PyExc_ValueError,
					     "nonzeroCoordsInt32: insufficient index space.\n");
				return -1;
			}
			for(j=0; j<noutargs; j++)
			{
				output[j][next] = index / sstride[j];
                                index %= sstride[j];
			}
			++ next;
		}
	}
	return 0;
}

SELF_CHECKED_CFUNC_DESCR(nonzeroCoordsInt32, CFUNC_UFUNC);


/****************** UInt32 *******************/


void  quicksort0UInt32(UInt32 *pl, UInt32 *pr)
{
	UInt32 vp, SWAP_temp;
	UInt32 *stack[100], **sptr = stack, *pm, *pi, *pj, *pt;
        
	for(;;) {
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			if (STDC_LT(*pr,*pm)) SWAP(*pr,*pm);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			vp = *pm;
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(*pi,vp));
				do --pj; while (STDC_LT(vp,*pj));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, *pt);) {
				*pj-- = *pt--;
			}
			*pj = vp;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int quicksortUInt32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt32 *pl = (UInt32 *) buffers[0]; 
	UInt32 *pr = pl + niter - 1;

	quicksort0UInt32(pl, pr);
	return 0;
}

UFUNC_DESCR1(quicksortUInt32, sizeof(UInt32));


void  aquicksort0UInt32(long *pl, long *pr, UInt32 *v)
{
	UInt32 vp;
	long SWAP_temp;
	long *stack[100], **sptr = stack, *pm, *pi, *pj, *pt, vi;
        
	for(;;) {	
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			if (STDC_LT(v[*pr],v[*pm])) SWAP(*pr,*pm);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			vp = v[*pm];
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(v[*pi],vp));
				do --pj; while (STDC_LT(vp,v[*pj]));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, v[*pt]);) {
				*pj-- = *pt--;
			}
			*pj = vi;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int aquicksortUInt32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt32 *v = (UInt32 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;

	aquicksort0UInt32(pl, pr, v);
	return 0;
}

UFUNC_DESCR2(aquicksortUInt32, sizeof(UInt32), sizeof(long));


void mergesort0UInt32(UInt32 *pl, UInt32 *pr, UInt32 *pw)
{
	UInt32 vp, *pi, *pj, *pk, *pm;
	
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		mergesort0UInt32(pl,pm-1,pw);
		mergesort0UInt32(pm,pr,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(*pk,*pj)) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, *pk); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vp;
		}
        }
}

static int mergesortUInt32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt32 *pl = (UInt32 *) buffers[0];
	UInt32 *pr = pl + niter - 1;
	UInt32 *pw = (UInt32 *) malloc(((1 + niter/2))*sizeof(UInt32));

	if (pw != NULL) {
		mergesort0UInt32(pl, pr, pw);
		free(pw);
		return 0;
	}else{
		return -1;
	}
}

UFUNC_DESCR1(mergesortUInt32, sizeof(UInt32));


void amergesort0UInt32(long *pl, long *pr, UInt32 *v, long *pw)
{
        UInt32 vp;
	long vi, *pi, *pj, *pk, *pm;
        
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		amergesort0UInt32(pl,pm-1,v,pw);
		amergesort0UInt32(pm,pr,v,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(v[*pk],v[*pj])) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}	
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, v[*pk]); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vi;
		}
	}
}

static int amergesortUInt32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt32 *v  = (UInt32 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;
	long *pw = (long *) malloc(((1 + niter/2))*sizeof(long));

	if (pw != NULL) {
		amergesort0UInt32(pl, pr, v, pw);
		free(pw);
		return 0;
	}else{
		return -1;
       }
}

UFUNC_DESCR2(amergesortUInt32, sizeof(UInt32), sizeof(long));


void heapsort0UInt32(UInt32 *a, long n)
{
	UInt32 tmp;
	long i,j,l;

	/* The array needs to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1])) 
				j += 1;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1]))
				j++;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int heapsortUInt32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt32 *pl = (UInt32 *) buffers[0];
        
	heapsort0UInt32(pl, niter);
	return 0;
}

UFUNC_DESCR1(heapsortUInt32, sizeof(UInt32));


void aheapsort0UInt32(long *a, long n, UInt32 *v)
{
	long i,j,l, tmp;

	/* The arrays need to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]])) 
				j += 1;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]]))
				j++;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int aheapsortUInt32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt32 *v = (UInt32 *) buffers[0];
	long *pl = (long *) buffers[1]; 

	aheapsort0UInt32(pl, niter, v);
	return 0;
}

UFUNC_DESCR2(aheapsortUInt32, sizeof(UInt32), sizeof(long));


void sort0UInt32(UInt32 *v, long left, long right)
{
  UInt32 temp;
  long i, last, lastl, lastr, diff;

  diff = right - left;
  if (diff <= 0)
     return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  /* Split array into values < pivot, and values > pivot. */
  swap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      swap(last, i);
    }
  swap(left, last);
  
  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  sort0UInt32(v, left, lastl);
  sort0UInt32(v, lastr, right);
}

static int sortUInt32(long niter, long ninargs, long noutargs,
           void **buffers, long *bsizes)
{
    BEGIN_THREADS
    sort0UInt32( (UInt32 *) buffers[0], 0, niter-1);
    END_THREADS
    return 0;
}

UFUNC_DESCR1(sortUInt32, sizeof(UInt32));

void asort0UInt32(UInt32 *v, long *w, long left, long right)
{
  UInt32 temp;
  long  wtemp, i, last, lastl, lastr;

  if (left >= right)
    return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  wswap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      wswap(last, i);
    }
  wswap(left, last);

  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  asort0UInt32(v, w, left, lastl);
  asort0UInt32(v, w, lastr, right);
}

static int asortUInt32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
    BEGIN_THREADS
    asort0UInt32( (UInt32 *) buffers[0], (long *) buffers[1],
       0, niter-1);
    END_THREADS
    return 0;
}

CFUNC_DESCR(asortUInt32, CFUNC_UFUNC, CHECK_ALIGN, 0, 2, sizeof(UInt32), sizeof(long), 0, 0, 0, 0);

static long searchUInt32(long na, UInt32 *a, UInt32 v)
{
  long i = 0;
  UInt32 *b;
  while(na > 10)
    {
      long mid = na>>1;
      if (STDC_LE(v, a[mid]))
        na = mid;
      else
        {
          i += mid;
          a += mid;
          na -= mid;
        }
    }
  b = a;
  while((STDC_LT(*b, v)) && na--)
    ++ b;
  return i+(b-a);
}

static int searchsortedUInt32(int niter, int ninargs, int noutargs,
          void **buffers, long *bsizes)
{
  maybelong   nbins;   
  UInt32 *bins;
  UInt32 *values;
  long       *indices;
  maybelong i;
  
  if (NA_checkOneCBuffer("searchsortedUInt32", 1,
       buffers[0], bsizes[0], sizeof(maybelong)))
       return -1;
  nbins    = *(maybelong *)  buffers[0];

  if (NA_checkOneCBuffer("searchsortedUInt32", nbins,
      buffers[1], bsizes[1], sizeof(UInt32)))
      return -1;
  bins    = (UInt32 *) buffers[1];

  if (NA_checkOneCBuffer("searchsortedUInt32", niter,
      buffers[2], bsizes[2], sizeof(UInt32)))
      return -1;
  values  = (UInt32 *) buffers[2];

  if (NA_checkOneCBuffer("searchsortedUInt32", niter,
      buffers[3], bsizes[3], sizeof(long)))
      return -1;
  indices = (long *)   buffers[3];

  if (NA_checkIo("searchsortedUInt32", 3, 1, ninargs, noutargs))
     return -1;

  BEGIN_THREADS
  for(i=0; i<niter; i++)
     indices[i] = searchUInt32(nbins, bins, values[i]);
  END_THREADS
  return 0;
}

SELF_CHECKED_CFUNC_DESCR(searchsortedUInt32, CFUNC_UFUNC);

static int fillarrayUInt32 (long niter, long ninargs, long noutargs, 
			     void **buffers, long *bsizes) {
    long i;
    UInt32 start, delta;
    UInt32 *tin;
    UInt32 *tparams;
    Int8 itemsizes[2] = { sizeof(UInt32), sizeof(UInt32) };
    Int8 iters[2] = { 0, 2 };

    if (NA_checkIo("fillarrayUInt32", 1, 1, ninargs, noutargs) ||
	NA_checkNCBuffers("fillarrayUInt32", 2,
			 niter, buffers, bsizes, itemsizes, iters))
	    return -1;

    tin = (UInt32 *) buffers[0];
    tparams = (UInt32 *) buffers[1];
    start = tparams[0];
    delta = tparams[1];

    BEGIN_THREADS
    for (i=0; i<niter; i++, tin++) {
        *tin = start;  start += delta;
    }
    END_THREADS
    return 0;
}

SELF_CHECKED_CFUNC_DESCR(fillarrayUInt32, CFUNC_UFUNC);

static int nonzeroCoordsUInt32(long niter, long ninargs, long noutargs,
                               void **buffers, long *bsizes)
{
	UInt32   *source;
	maybelong  *sstride;
	long  **output;
	long i, minbsize, next = 0;
	
	if (NA_checkIo("nonzeroCoordsUInt32", 2, noutargs, ninargs, noutargs))
		return -1;

	if (NA_checkOneCBuffer("nonzeroCoordsUInt32", niter,
		      buffers[0], bsizes[0], sizeof(UInt32)))
		return -1;
	    
	if (NA_checkOneCBuffer("nonzeroCoordsUInt32", noutargs,
		      buffers[1], bsizes[1], sizeof(maybelong)))
		return -1;

	/* Check alignment only.  Storage optimization makes numarray
	 shorter than niter likely, possibly even 0! */
	for (i=0; i<noutargs; i++)
		if (NA_checkOneCBuffer("nonzeroCoordsUInt32", 0,
			      buffers[2+i], bsizes[2+i], sizeof(long)))
			return -1;

	source     = (UInt32 *)  buffers[0];
	sstride    = (maybelong  *)  buffers[1];
	output     = (long **) &buffers[2];

	minbsize = bsizes[0];
	for(i=0; i<noutargs; i++)
		if (sstride[i] <= 0) {
			PyErr_Format(PyExc_ValueError,
			     "nonzeroCoordsUInt32: bad stride[%ld].\n",
				     i);
			return -1;
		}
		else if (bsizes[i+2] < minbsize)
			minbsize = bsizes[i+2];
	
	next = 0;
	for(i=0; i<niter; i++)
	{
		if (source[i] != 0)
		{
			long j, index = i;
			if (next >= minbsize) {
				PyErr_Format(PyExc_ValueError,
					     "nonzeroCoordsUInt32: insufficient index space.\n");
				return -1;
			}
			for(j=0; j<noutargs; j++)
			{
				output[j][next] = index / sstride[j];
                                index %= sstride[j];
			}
			++ next;
		}
	}
	return 0;
}

SELF_CHECKED_CFUNC_DESCR(nonzeroCoordsUInt32, CFUNC_UFUNC);


/****************** Float32 *******************/


void  quicksort0Float32(Float32 *pl, Float32 *pr)
{
	Float32 vp, SWAP_temp;
	Float32 *stack[100], **sptr = stack, *pm, *pi, *pj, *pt;
        
	for(;;) {
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			if (STDC_LT(*pr,*pm)) SWAP(*pr,*pm);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			vp = *pm;
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(*pi,vp));
				do --pj; while (STDC_LT(vp,*pj));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, *pt);) {
				*pj-- = *pt--;
			}
			*pj = vp;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int quicksortFloat32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Float32 *pl = (Float32 *) buffers[0]; 
	Float32 *pr = pl + niter - 1;

	quicksort0Float32(pl, pr);
	return 0;
}

UFUNC_DESCR1(quicksortFloat32, sizeof(Float32));


void  aquicksort0Float32(long *pl, long *pr, Float32 *v)
{
	Float32 vp;
	long SWAP_temp;
	long *stack[100], **sptr = stack, *pm, *pi, *pj, *pt, vi;
        
	for(;;) {	
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			if (STDC_LT(v[*pr],v[*pm])) SWAP(*pr,*pm);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			vp = v[*pm];
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(v[*pi],vp));
				do --pj; while (STDC_LT(vp,v[*pj]));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, v[*pt]);) {
				*pj-- = *pt--;
			}
			*pj = vi;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int aquicksortFloat32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Float32 *v = (Float32 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;

	aquicksort0Float32(pl, pr, v);
	return 0;
}

UFUNC_DESCR2(aquicksortFloat32, sizeof(Float32), sizeof(long));


void mergesort0Float32(Float32 *pl, Float32 *pr, Float32 *pw)
{
	Float32 vp, *pi, *pj, *pk, *pm;
	
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		mergesort0Float32(pl,pm-1,pw);
		mergesort0Float32(pm,pr,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(*pk,*pj)) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, *pk); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vp;
		}
        }
}

static int mergesortFloat32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Float32 *pl = (Float32 *) buffers[0];
	Float32 *pr = pl + niter - 1;
	Float32 *pw = (Float32 *) malloc(((1 + niter/2))*sizeof(Float32));

	if (pw != NULL) {
		mergesort0Float32(pl, pr, pw);
		free(pw);
		return 0;
	}else{
		return -1;
	}
}

UFUNC_DESCR1(mergesortFloat32, sizeof(Float32));


void amergesort0Float32(long *pl, long *pr, Float32 *v, long *pw)
{
        Float32 vp;
	long vi, *pi, *pj, *pk, *pm;
        
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		amergesort0Float32(pl,pm-1,v,pw);
		amergesort0Float32(pm,pr,v,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(v[*pk],v[*pj])) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}	
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, v[*pk]); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vi;
		}
	}
}

static int amergesortFloat32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Float32 *v  = (Float32 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;
	long *pw = (long *) malloc(((1 + niter/2))*sizeof(long));

	if (pw != NULL) {
		amergesort0Float32(pl, pr, v, pw);
		free(pw);
		return 0;
	}else{
		return -1;
       }
}

UFUNC_DESCR2(amergesortFloat32, sizeof(Float32), sizeof(long));


void heapsort0Float32(Float32 *a, long n)
{
	Float32 tmp;
	long i,j,l;

	/* The array needs to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1])) 
				j += 1;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1]))
				j++;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int heapsortFloat32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Float32 *pl = (Float32 *) buffers[0];
        
	heapsort0Float32(pl, niter);
	return 0;
}

UFUNC_DESCR1(heapsortFloat32, sizeof(Float32));


void aheapsort0Float32(long *a, long n, Float32 *v)
{
	long i,j,l, tmp;

	/* The arrays need to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]])) 
				j += 1;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]]))
				j++;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int aheapsortFloat32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Float32 *v = (Float32 *) buffers[0];
	long *pl = (long *) buffers[1]; 

	aheapsort0Float32(pl, niter, v);
	return 0;
}

UFUNC_DESCR2(aheapsortFloat32, sizeof(Float32), sizeof(long));


void sort0Float32(Float32 *v, long left, long right)
{
  Float32 temp;
  long i, last, lastl, lastr, diff;

  diff = right - left;
  if (diff <= 0)
     return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  /* Split array into values < pivot, and values > pivot. */
  swap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      swap(last, i);
    }
  swap(left, last);
  
  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  sort0Float32(v, left, lastl);
  sort0Float32(v, lastr, right);
}

static int sortFloat32(long niter, long ninargs, long noutargs,
           void **buffers, long *bsizes)
{
    BEGIN_THREADS
    sort0Float32( (Float32 *) buffers[0], 0, niter-1);
    END_THREADS
    return 0;
}

UFUNC_DESCR1(sortFloat32, sizeof(Float32));

void asort0Float32(Float32 *v, long *w, long left, long right)
{
  Float32 temp;
  long  wtemp, i, last, lastl, lastr;

  if (left >= right)
    return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  wswap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      wswap(last, i);
    }
  wswap(left, last);

  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  asort0Float32(v, w, left, lastl);
  asort0Float32(v, w, lastr, right);
}

static int asortFloat32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
    BEGIN_THREADS
    asort0Float32( (Float32 *) buffers[0], (long *) buffers[1],
       0, niter-1);
    END_THREADS
    return 0;
}

CFUNC_DESCR(asortFloat32, CFUNC_UFUNC, CHECK_ALIGN, 0, 2, sizeof(Float32), sizeof(long), 0, 0, 0, 0);

static long searchFloat32(long na, Float32 *a, Float32 v)
{
  long i = 0;
  Float32 *b;
  while(na > 10)
    {
      long mid = na>>1;
      if (STDC_LE(v, a[mid]))
        na = mid;
      else
        {
          i += mid;
          a += mid;
          na -= mid;
        }
    }
  b = a;
  while((STDC_LT(*b, v)) && na--)
    ++ b;
  return i+(b-a);
}

static int searchsortedFloat32(int niter, int ninargs, int noutargs,
          void **buffers, long *bsizes)
{
  maybelong   nbins;   
  Float32 *bins;
  Float32 *values;
  long       *indices;
  maybelong i;
  
  if (NA_checkOneCBuffer("searchsortedFloat32", 1,
       buffers[0], bsizes[0], sizeof(maybelong)))
       return -1;
  nbins    = *(maybelong *)  buffers[0];

  if (NA_checkOneCBuffer("searchsortedFloat32", nbins,
      buffers[1], bsizes[1], sizeof(Float32)))
      return -1;
  bins    = (Float32 *) buffers[1];

  if (NA_checkOneCBuffer("searchsortedFloat32", niter,
      buffers[2], bsizes[2], sizeof(Float32)))
      return -1;
  values  = (Float32 *) buffers[2];

  if (NA_checkOneCBuffer("searchsortedFloat32", niter,
      buffers[3], bsizes[3], sizeof(long)))
      return -1;
  indices = (long *)   buffers[3];

  if (NA_checkIo("searchsortedFloat32", 3, 1, ninargs, noutargs))
     return -1;

  BEGIN_THREADS
  for(i=0; i<niter; i++)
     indices[i] = searchFloat32(nbins, bins, values[i]);
  END_THREADS
  return 0;
}

SELF_CHECKED_CFUNC_DESCR(searchsortedFloat32, CFUNC_UFUNC);

static int fillarrayFloat32 (long niter, long ninargs, long noutargs, 
			     void **buffers, long *bsizes) {
    long i;
    Float32 start, delta;
    Float32 *tin;
    Float32 *tparams;
    Int8 itemsizes[2] = { sizeof(Float32), sizeof(Float32) };
    Int8 iters[2] = { 0, 2 };

    if (NA_checkIo("fillarrayFloat32", 1, 1, ninargs, noutargs) ||
	NA_checkNCBuffers("fillarrayFloat32", 2,
			 niter, buffers, bsizes, itemsizes, iters))
	    return -1;

    tin = (Float32 *) buffers[0];
    tparams = (Float32 *) buffers[1];
    start = tparams[0];
    delta = tparams[1];

    BEGIN_THREADS
    for (i=0; i<niter; i++, tin++) {
        *tin = start;  start += delta;
    }
    END_THREADS
    return 0;
}

SELF_CHECKED_CFUNC_DESCR(fillarrayFloat32, CFUNC_UFUNC);

static int nonzeroCoordsFloat32(long niter, long ninargs, long noutargs,
                               void **buffers, long *bsizes)
{
	Float32   *source;
	maybelong  *sstride;
	long  **output;
	long i, minbsize, next = 0;
	
	if (NA_checkIo("nonzeroCoordsFloat32", 2, noutargs, ninargs, noutargs))
		return -1;

	if (NA_checkOneCBuffer("nonzeroCoordsFloat32", niter,
		      buffers[0], bsizes[0], sizeof(Float32)))
		return -1;
	    
	if (NA_checkOneCBuffer("nonzeroCoordsFloat32", noutargs,
		      buffers[1], bsizes[1], sizeof(maybelong)))
		return -1;

	/* Check alignment only.  Storage optimization makes numarray
	 shorter than niter likely, possibly even 0! */
	for (i=0; i<noutargs; i++)
		if (NA_checkOneCBuffer("nonzeroCoordsFloat32", 0,
			      buffers[2+i], bsizes[2+i], sizeof(long)))
			return -1;

	source     = (Float32 *)  buffers[0];
	sstride    = (maybelong  *)  buffers[1];
	output     = (long **) &buffers[2];

	minbsize = bsizes[0];
	for(i=0; i<noutargs; i++)
		if (sstride[i] <= 0) {
			PyErr_Format(PyExc_ValueError,
			     "nonzeroCoordsFloat32: bad stride[%ld].\n",
				     i);
			return -1;
		}
		else if (bsizes[i+2] < minbsize)
			minbsize = bsizes[i+2];
	
	next = 0;
	for(i=0; i<niter; i++)
	{
		if (source[i] != 0)
		{
			long j, index = i;
			if (next >= minbsize) {
				PyErr_Format(PyExc_ValueError,
					     "nonzeroCoordsFloat32: insufficient index space.\n");
				return -1;
			}
			for(j=0; j<noutargs; j++)
			{
				output[j][next] = index / sstride[j];
                                index %= sstride[j];
			}
			++ next;
		}
	}
	return 0;
}

SELF_CHECKED_CFUNC_DESCR(nonzeroCoordsFloat32, CFUNC_UFUNC);


/****************** Float64 *******************/


void  quicksort0Float64(Float64 *pl, Float64 *pr)
{
	Float64 vp, SWAP_temp;
	Float64 *stack[100], **sptr = stack, *pm, *pi, *pj, *pt;
        
	for(;;) {
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			if (STDC_LT(*pr,*pm)) SWAP(*pr,*pm);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			vp = *pm;
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(*pi,vp));
				do --pj; while (STDC_LT(vp,*pj));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, *pt);) {
				*pj-- = *pt--;
			}
			*pj = vp;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int quicksortFloat64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Float64 *pl = (Float64 *) buffers[0]; 
	Float64 *pr = pl + niter - 1;

	quicksort0Float64(pl, pr);
	return 0;
}

UFUNC_DESCR1(quicksortFloat64, sizeof(Float64));


void  aquicksort0Float64(long *pl, long *pr, Float64 *v)
{
	Float64 vp;
	long SWAP_temp;
	long *stack[100], **sptr = stack, *pm, *pi, *pj, *pt, vi;
        
	for(;;) {	
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			if (STDC_LT(v[*pr],v[*pm])) SWAP(*pr,*pm);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			vp = v[*pm];
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(v[*pi],vp));
				do --pj; while (STDC_LT(vp,v[*pj]));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, v[*pt]);) {
				*pj-- = *pt--;
			}
			*pj = vi;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int aquicksortFloat64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Float64 *v = (Float64 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;

	aquicksort0Float64(pl, pr, v);
	return 0;
}

UFUNC_DESCR2(aquicksortFloat64, sizeof(Float64), sizeof(long));


void mergesort0Float64(Float64 *pl, Float64 *pr, Float64 *pw)
{
	Float64 vp, *pi, *pj, *pk, *pm;
	
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		mergesort0Float64(pl,pm-1,pw);
		mergesort0Float64(pm,pr,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(*pk,*pj)) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, *pk); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vp;
		}
        }
}

static int mergesortFloat64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Float64 *pl = (Float64 *) buffers[0];
	Float64 *pr = pl + niter - 1;
	Float64 *pw = (Float64 *) malloc(((1 + niter/2))*sizeof(Float64));

	if (pw != NULL) {
		mergesort0Float64(pl, pr, pw);
		free(pw);
		return 0;
	}else{
		return -1;
	}
}

UFUNC_DESCR1(mergesortFloat64, sizeof(Float64));


void amergesort0Float64(long *pl, long *pr, Float64 *v, long *pw)
{
        Float64 vp;
	long vi, *pi, *pj, *pk, *pm;
        
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		amergesort0Float64(pl,pm-1,v,pw);
		amergesort0Float64(pm,pr,v,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(v[*pk],v[*pj])) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}	
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, v[*pk]); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vi;
		}
	}
}

static int amergesortFloat64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Float64 *v  = (Float64 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;
	long *pw = (long *) malloc(((1 + niter/2))*sizeof(long));

	if (pw != NULL) {
		amergesort0Float64(pl, pr, v, pw);
		free(pw);
		return 0;
	}else{
		return -1;
       }
}

UFUNC_DESCR2(amergesortFloat64, sizeof(Float64), sizeof(long));


void heapsort0Float64(Float64 *a, long n)
{
	Float64 tmp;
	long i,j,l;

	/* The array needs to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1])) 
				j += 1;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1]))
				j++;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int heapsortFloat64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Float64 *pl = (Float64 *) buffers[0];
        
	heapsort0Float64(pl, niter);
	return 0;
}

UFUNC_DESCR1(heapsortFloat64, sizeof(Float64));


void aheapsort0Float64(long *a, long n, Float64 *v)
{
	long i,j,l, tmp;

	/* The arrays need to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]])) 
				j += 1;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]]))
				j++;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int aheapsortFloat64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Float64 *v = (Float64 *) buffers[0];
	long *pl = (long *) buffers[1]; 

	aheapsort0Float64(pl, niter, v);
	return 0;
}

UFUNC_DESCR2(aheapsortFloat64, sizeof(Float64), sizeof(long));


void sort0Float64(Float64 *v, long left, long right)
{
  Float64 temp;
  long i, last, lastl, lastr, diff;

  diff = right - left;
  if (diff <= 0)
     return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  /* Split array into values < pivot, and values > pivot. */
  swap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      swap(last, i);
    }
  swap(left, last);
  
  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  sort0Float64(v, left, lastl);
  sort0Float64(v, lastr, right);
}

static int sortFloat64(long niter, long ninargs, long noutargs,
           void **buffers, long *bsizes)
{
    BEGIN_THREADS
    sort0Float64( (Float64 *) buffers[0], 0, niter-1);
    END_THREADS
    return 0;
}

UFUNC_DESCR1(sortFloat64, sizeof(Float64));

void asort0Float64(Float64 *v, long *w, long left, long right)
{
  Float64 temp;
  long  wtemp, i, last, lastl, lastr;

  if (left >= right)
    return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  wswap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      wswap(last, i);
    }
  wswap(left, last);

  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  asort0Float64(v, w, left, lastl);
  asort0Float64(v, w, lastr, right);
}

static int asortFloat64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
    BEGIN_THREADS
    asort0Float64( (Float64 *) buffers[0], (long *) buffers[1],
       0, niter-1);
    END_THREADS
    return 0;
}

CFUNC_DESCR(asortFloat64, CFUNC_UFUNC, CHECK_ALIGN, 0, 2, sizeof(Float64), sizeof(long), 0, 0, 0, 0);

static long searchFloat64(long na, Float64 *a, Float64 v)
{
  long i = 0;
  Float64 *b;
  while(na > 10)
    {
      long mid = na>>1;
      if (STDC_LE(v, a[mid]))
        na = mid;
      else
        {
          i += mid;
          a += mid;
          na -= mid;
        }
    }
  b = a;
  while((STDC_LT(*b, v)) && na--)
    ++ b;
  return i+(b-a);
}

static int searchsortedFloat64(int niter, int ninargs, int noutargs,
          void **buffers, long *bsizes)
{
  maybelong   nbins;   
  Float64 *bins;
  Float64 *values;
  long       *indices;
  maybelong i;
  
  if (NA_checkOneCBuffer("searchsortedFloat64", 1,
       buffers[0], bsizes[0], sizeof(maybelong)))
       return -1;
  nbins    = *(maybelong *)  buffers[0];

  if (NA_checkOneCBuffer("searchsortedFloat64", nbins,
      buffers[1], bsizes[1], sizeof(Float64)))
      return -1;
  bins    = (Float64 *) buffers[1];

  if (NA_checkOneCBuffer("searchsortedFloat64", niter,
      buffers[2], bsizes[2], sizeof(Float64)))
      return -1;
  values  = (Float64 *) buffers[2];

  if (NA_checkOneCBuffer("searchsortedFloat64", niter,
      buffers[3], bsizes[3], sizeof(long)))
      return -1;
  indices = (long *)   buffers[3];

  if (NA_checkIo("searchsortedFloat64", 3, 1, ninargs, noutargs))
     return -1;

  BEGIN_THREADS
  for(i=0; i<niter; i++)
     indices[i] = searchFloat64(nbins, bins, values[i]);
  END_THREADS
  return 0;
}

SELF_CHECKED_CFUNC_DESCR(searchsortedFloat64, CFUNC_UFUNC);

static int fillarrayFloat64 (long niter, long ninargs, long noutargs, 
			     void **buffers, long *bsizes) {
    long i;
    Float64 start, delta;
    Float64 *tin;
    Float64 *tparams;
    Int8 itemsizes[2] = { sizeof(Float64), sizeof(Float64) };
    Int8 iters[2] = { 0, 2 };

    if (NA_checkIo("fillarrayFloat64", 1, 1, ninargs, noutargs) ||
	NA_checkNCBuffers("fillarrayFloat64", 2,
			 niter, buffers, bsizes, itemsizes, iters))
	    return -1;

    tin = (Float64 *) buffers[0];
    tparams = (Float64 *) buffers[1];
    start = tparams[0];
    delta = tparams[1];

    BEGIN_THREADS
    for (i=0; i<niter; i++, tin++) {
        *tin = start;  start += delta;
    }
    END_THREADS
    return 0;
}

SELF_CHECKED_CFUNC_DESCR(fillarrayFloat64, CFUNC_UFUNC);

static int nonzeroCoordsFloat64(long niter, long ninargs, long noutargs,
                               void **buffers, long *bsizes)
{
	Float64   *source;
	maybelong  *sstride;
	long  **output;
	long i, minbsize, next = 0;
	
	if (NA_checkIo("nonzeroCoordsFloat64", 2, noutargs, ninargs, noutargs))
		return -1;

	if (NA_checkOneCBuffer("nonzeroCoordsFloat64", niter,
		      buffers[0], bsizes[0], sizeof(Float64)))
		return -1;
	    
	if (NA_checkOneCBuffer("nonzeroCoordsFloat64", noutargs,
		      buffers[1], bsizes[1], sizeof(maybelong)))
		return -1;

	/* Check alignment only.  Storage optimization makes numarray
	 shorter than niter likely, possibly even 0! */
	for (i=0; i<noutargs; i++)
		if (NA_checkOneCBuffer("nonzeroCoordsFloat64", 0,
			      buffers[2+i], bsizes[2+i], sizeof(long)))
			return -1;

	source     = (Float64 *)  buffers[0];
	sstride    = (maybelong  *)  buffers[1];
	output     = (long **) &buffers[2];

	minbsize = bsizes[0];
	for(i=0; i<noutargs; i++)
		if (sstride[i] <= 0) {
			PyErr_Format(PyExc_ValueError,
			     "nonzeroCoordsFloat64: bad stride[%ld].\n",
				     i);
			return -1;
		}
		else if (bsizes[i+2] < minbsize)
			minbsize = bsizes[i+2];
	
	next = 0;
	for(i=0; i<niter; i++)
	{
		if (source[i] != 0)
		{
			long j, index = i;
			if (next >= minbsize) {
				PyErr_Format(PyExc_ValueError,
					     "nonzeroCoordsFloat64: insufficient index space.\n");
				return -1;
			}
			for(j=0; j<noutargs; j++)
			{
				output[j][next] = index / sstride[j];
                                index %= sstride[j];
			}
			++ next;
		}
	}
	return 0;
}

SELF_CHECKED_CFUNC_DESCR(nonzeroCoordsFloat64, CFUNC_UFUNC);


/****************** Complex32 *******************/


void  quicksort0Complex32(Complex32 *pl, Complex32 *pr)
{
	Complex32 vp, SWAP_temp;
	Complex32 *stack[100], **sptr = stack, *pm, *pi, *pj, *pt;
        
	for(;;) {
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (NUM_CLT(*pm,*pl)) SWAP(*pm,*pl);
			if (NUM_CLT(*pr,*pm)) SWAP(*pr,*pm);
			if (NUM_CLT(*pm,*pl)) SWAP(*pm,*pl);
			vp = *pm;
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (NUM_CLT(*pi,vp));
				do --pj; while (NUM_CLT(vp,*pj));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pt = pi - 1; pj > pl && NUM_CLT(vp, *pt);) {
				*pj-- = *pt--;
			}
			*pj = vp;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int quicksortComplex32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Complex32 *pl = (Complex32 *) buffers[0]; 
	Complex32 *pr = pl + niter - 1;

	quicksort0Complex32(pl, pr);
	return 0;
}

UFUNC_DESCR1(quicksortComplex32, sizeof(Complex32));


void  aquicksort0Complex32(long *pl, long *pr, Complex32 *v)
{
	Complex32 vp;
	long SWAP_temp;
	long *stack[100], **sptr = stack, *pm, *pi, *pj, *pt, vi;
        
	for(;;) {	
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (NUM_CLT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			if (NUM_CLT(v[*pr],v[*pm])) SWAP(*pr,*pm);
			if (NUM_CLT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			vp = v[*pm];
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (NUM_CLT(v[*pi],vp));
				do --pj; while (NUM_CLT(vp,v[*pj]));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pt = pi - 1; pj > pl && NUM_CLT(vp, v[*pt]);) {
				*pj-- = *pt--;
			}
			*pj = vi;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int aquicksortComplex32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Complex32 *v = (Complex32 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;

	aquicksort0Complex32(pl, pr, v);
	return 0;
}

UFUNC_DESCR2(aquicksortComplex32, sizeof(Complex32), sizeof(long));


void mergesort0Complex32(Complex32 *pl, Complex32 *pr, Complex32 *pw)
{
	Complex32 vp, *pi, *pj, *pk, *pm;
	
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		mergesort0Complex32(pl,pm-1,pw);
		mergesort0Complex32(pm,pr,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (NUM_CLE(*pk,*pj)) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pk = pi - 1; pj > pl && NUM_CLT(vp, *pk); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vp;
		}
        }
}

static int mergesortComplex32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Complex32 *pl = (Complex32 *) buffers[0];
	Complex32 *pr = pl + niter - 1;
	Complex32 *pw = (Complex32 *) malloc(((1 + niter/2))*sizeof(Complex32));

	if (pw != NULL) {
		mergesort0Complex32(pl, pr, pw);
		free(pw);
		return 0;
	}else{
		return -1;
	}
}

UFUNC_DESCR1(mergesortComplex32, sizeof(Complex32));


void amergesort0Complex32(long *pl, long *pr, Complex32 *v, long *pw)
{
        Complex32 vp;
	long vi, *pi, *pj, *pk, *pm;
        
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		amergesort0Complex32(pl,pm-1,v,pw);
		amergesort0Complex32(pm,pr,v,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (NUM_CLE(v[*pk],v[*pj])) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}	
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pk = pi - 1; pj > pl && NUM_CLT(vp, v[*pk]); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vi;
		}
	}
}

static int amergesortComplex32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Complex32 *v  = (Complex32 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;
	long *pw = (long *) malloc(((1 + niter/2))*sizeof(long));

	if (pw != NULL) {
		amergesort0Complex32(pl, pr, v, pw);
		free(pw);
		return 0;
	}else{
		return -1;
       }
}

UFUNC_DESCR2(amergesortComplex32, sizeof(Complex32), sizeof(long));


void heapsort0Complex32(Complex32 *a, long n)
{
	Complex32 tmp;
	long i,j,l;

	/* The array needs to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && NUM_CLT(a[j], a[j+1])) 
				j += 1;
			if (NUM_CLT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && NUM_CLT(a[j], a[j+1]))
				j++;
			if (NUM_CLT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int heapsortComplex32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Complex32 *pl = (Complex32 *) buffers[0];
        
	heapsort0Complex32(pl, niter);
	return 0;
}

UFUNC_DESCR1(heapsortComplex32, sizeof(Complex32));


void aheapsort0Complex32(long *a, long n, Complex32 *v)
{
	long i,j,l, tmp;

	/* The arrays need to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && NUM_CLT(v[a[j]], v[a[j+1]])) 
				j += 1;
			if (NUM_CLT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && NUM_CLT(v[a[j]], v[a[j+1]]))
				j++;
			if (NUM_CLT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int aheapsortComplex32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Complex32 *v = (Complex32 *) buffers[0];
	long *pl = (long *) buffers[1]; 

	aheapsort0Complex32(pl, niter, v);
	return 0;
}

UFUNC_DESCR2(aheapsortComplex32, sizeof(Complex32), sizeof(long));


void sort0Complex32(Complex32 *v, long left, long right)
{
  Complex32 temp;
  long i, last, lastl, lastr, diff;

  diff = right - left;
  if (diff <= 0)
     return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  /* Split array into values < pivot, and values > pivot. */
  swap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (NUM_CLT(v[i], v[left]))
    {
      ++ last;
      swap(last, i);
    }
  swap(left, last);
  
  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && NUM_CEQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && NUM_CEQ(v[last],v[lastr]))
     ++lastr;

  sort0Complex32(v, left, lastl);
  sort0Complex32(v, lastr, right);
}

static int sortComplex32(long niter, long ninargs, long noutargs,
           void **buffers, long *bsizes)
{
    BEGIN_THREADS
    sort0Complex32( (Complex32 *) buffers[0], 0, niter-1);
    END_THREADS
    return 0;
}

UFUNC_DESCR1(sortComplex32, sizeof(Complex32));

void asort0Complex32(Complex32 *v, long *w, long left, long right)
{
  Complex32 temp;
  long  wtemp, i, last, lastl, lastr;

  if (left >= right)
    return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  wswap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (NUM_CLT(v[i], v[left]))
    {
      ++ last;
      wswap(last, i);
    }
  wswap(left, last);

  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && NUM_CEQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && NUM_CEQ(v[last],v[lastr]))
     ++lastr;

  asort0Complex32(v, w, left, lastl);
  asort0Complex32(v, w, lastr, right);
}

static int asortComplex32(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
    BEGIN_THREADS
    asort0Complex32( (Complex32 *) buffers[0], (long *) buffers[1],
       0, niter-1);
    END_THREADS
    return 0;
}

CFUNC_DESCR(asortComplex32, CFUNC_UFUNC, CHECK_ALIGN, 0, 2, sizeof(Complex32), sizeof(long), 0, 0, 0, 0);

static long searchComplex32(long na, Complex32 *a, Complex32 v)
{
  long i = 0;
  Complex32 *b;
  while(na > 10)
    {
      long mid = na>>1;
      if (NUM_CLE(v, a[mid]))
        na = mid;
      else
        {
          i += mid;
          a += mid;
          na -= mid;
        }
    }
  b = a;
  while((NUM_CLT(*b, v)) && na--)
    ++ b;
  return i+(b-a);
}

static int searchsortedComplex32(int niter, int ninargs, int noutargs,
          void **buffers, long *bsizes)
{
  maybelong   nbins;   
  Complex32 *bins;
  Complex32 *values;
  long       *indices;
  maybelong i;
  
  if (NA_checkOneCBuffer("searchsortedComplex32", 1,
       buffers[0], bsizes[0], sizeof(maybelong)))
       return -1;
  nbins    = *(maybelong *)  buffers[0];

  if (NA_checkOneCBuffer("searchsortedComplex32", nbins,
      buffers[1], bsizes[1], sizeof(Complex32)))
      return -1;
  bins    = (Complex32 *) buffers[1];

  if (NA_checkOneCBuffer("searchsortedComplex32", niter,
      buffers[2], bsizes[2], sizeof(Complex32)))
      return -1;
  values  = (Complex32 *) buffers[2];

  if (NA_checkOneCBuffer("searchsortedComplex32", niter,
      buffers[3], bsizes[3], sizeof(long)))
      return -1;
  indices = (long *)   buffers[3];

  if (NA_checkIo("searchsortedComplex32", 3, 1, ninargs, noutargs))
     return -1;

  BEGIN_THREADS
  for(i=0; i<niter; i++)
     indices[i] = searchComplex32(nbins, bins, values[i]);
  END_THREADS
  return 0;
}

SELF_CHECKED_CFUNC_DESCR(searchsortedComplex32, CFUNC_UFUNC);

static int fillarrayComplex32 (long niter, long ninargs, long noutargs, 
			     void **buffers, long *bsizes) {
    long i;
    Complex32 start, delta;
    Complex32 *tin;
    Complex32 *tparams;
    Int8 itemsizes[2] = { sizeof(Complex32), sizeof(Complex32) };
    Int8 iters[2] = { 0, 2 };

    if (NA_checkIo("fillarrayComplex32", 1, 1, ninargs, noutargs) ||
	NA_checkNCBuffers("fillarrayComplex32", 2,
			 niter, buffers, bsizes, itemsizes, iters))
	    return -1;

    tin = (Complex32 *) buffers[0];
    tparams = (Complex32 *) buffers[1];
    start = tparams[0];
    delta = tparams[1];

    BEGIN_THREADS
    for (i=0; i<niter; i++, tin++) {
        NUM_CASS(*tin, start); NUM_CADD(start, delta, start);
    }
    END_THREADS
    return 0;
}

SELF_CHECKED_CFUNC_DESCR(fillarrayComplex32, CFUNC_UFUNC);

static int nonzeroCoordsComplex32(long niter, long ninargs, long noutargs,
                               void **buffers, long *bsizes)
{
	Complex32   *source;
	maybelong  *sstride;
	long  **output;
	long i, minbsize, next = 0;
	
	if (NA_checkIo("nonzeroCoordsComplex32", 2, noutargs, ninargs, noutargs))
		return -1;

	if (NA_checkOneCBuffer("nonzeroCoordsComplex32", niter,
		      buffers[0], bsizes[0], sizeof(Complex32)))
		return -1;
	    
	if (NA_checkOneCBuffer("nonzeroCoordsComplex32", noutargs,
		      buffers[1], bsizes[1], sizeof(maybelong)))
		return -1;

	/* Check alignment only.  Storage optimization makes numarray
	 shorter than niter likely, possibly even 0! */
	for (i=0; i<noutargs; i++)
		if (NA_checkOneCBuffer("nonzeroCoordsComplex32", 0,
			      buffers[2+i], bsizes[2+i], sizeof(long)))
			return -1;

	source     = (Complex32 *)  buffers[0];
	sstride    = (maybelong  *)  buffers[1];
	output     = (long **) &buffers[2];

	minbsize = bsizes[0];
	for(i=0; i<noutargs; i++)
		if (sstride[i] <= 0) {
			PyErr_Format(PyExc_ValueError,
			     "nonzeroCoordsComplex32: bad stride[%ld].\n",
				     i);
			return -1;
		}
		else if (bsizes[i+2] < minbsize)
			minbsize = bsizes[i+2];
	
	next = 0;
	for(i=0; i<niter; i++)
	{
		if (((source[i].r !=0) || (source[i].i != 0)))
		{
			long j, index = i;
			if (next >= minbsize) {
				PyErr_Format(PyExc_ValueError,
					     "nonzeroCoordsComplex32: insufficient index space.\n");
				return -1;
			}
			for(j=0; j<noutargs; j++)
			{
				output[j][next] = index / sstride[j];
                                index %= sstride[j];
			}
			++ next;
		}
	}
	return 0;
}

SELF_CHECKED_CFUNC_DESCR(nonzeroCoordsComplex32, CFUNC_UFUNC);


/****************** Complex64 *******************/


void  quicksort0Complex64(Complex64 *pl, Complex64 *pr)
{
	Complex64 vp, SWAP_temp;
	Complex64 *stack[100], **sptr = stack, *pm, *pi, *pj, *pt;
        
	for(;;) {
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (NUM_CLT(*pm,*pl)) SWAP(*pm,*pl);
			if (NUM_CLT(*pr,*pm)) SWAP(*pr,*pm);
			if (NUM_CLT(*pm,*pl)) SWAP(*pm,*pl);
			vp = *pm;
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (NUM_CLT(*pi,vp));
				do --pj; while (NUM_CLT(vp,*pj));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pt = pi - 1; pj > pl && NUM_CLT(vp, *pt);) {
				*pj-- = *pt--;
			}
			*pj = vp;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int quicksortComplex64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Complex64 *pl = (Complex64 *) buffers[0]; 
	Complex64 *pr = pl + niter - 1;

	quicksort0Complex64(pl, pr);
	return 0;
}

UFUNC_DESCR1(quicksortComplex64, sizeof(Complex64));


void  aquicksort0Complex64(long *pl, long *pr, Complex64 *v)
{
	Complex64 vp;
	long SWAP_temp;
	long *stack[100], **sptr = stack, *pm, *pi, *pj, *pt, vi;
        
	for(;;) {	
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (NUM_CLT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			if (NUM_CLT(v[*pr],v[*pm])) SWAP(*pr,*pm);
			if (NUM_CLT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			vp = v[*pm];
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (NUM_CLT(v[*pi],vp));
				do --pj; while (NUM_CLT(vp,v[*pj]));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pt = pi - 1; pj > pl && NUM_CLT(vp, v[*pt]);) {
				*pj-- = *pt--;
			}
			*pj = vi;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int aquicksortComplex64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Complex64 *v = (Complex64 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;

	aquicksort0Complex64(pl, pr, v);
	return 0;
}

UFUNC_DESCR2(aquicksortComplex64, sizeof(Complex64), sizeof(long));


void mergesort0Complex64(Complex64 *pl, Complex64 *pr, Complex64 *pw)
{
	Complex64 vp, *pi, *pj, *pk, *pm;
	
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		mergesort0Complex64(pl,pm-1,pw);
		mergesort0Complex64(pm,pr,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (NUM_CLE(*pk,*pj)) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pk = pi - 1; pj > pl && NUM_CLT(vp, *pk); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vp;
		}
        }
}

static int mergesortComplex64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Complex64 *pl = (Complex64 *) buffers[0];
	Complex64 *pr = pl + niter - 1;
	Complex64 *pw = (Complex64 *) malloc(((1 + niter/2))*sizeof(Complex64));

	if (pw != NULL) {
		mergesort0Complex64(pl, pr, pw);
		free(pw);
		return 0;
	}else{
		return -1;
	}
}

UFUNC_DESCR1(mergesortComplex64, sizeof(Complex64));


void amergesort0Complex64(long *pl, long *pr, Complex64 *v, long *pw)
{
        Complex64 vp;
	long vi, *pi, *pj, *pk, *pm;
        
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		amergesort0Complex64(pl,pm-1,v,pw);
		amergesort0Complex64(pm,pr,v,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (NUM_CLE(v[*pk],v[*pj])) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}	
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pk = pi - 1; pj > pl && NUM_CLT(vp, v[*pk]); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vi;
		}
	}
}

static int amergesortComplex64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Complex64 *v  = (Complex64 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;
	long *pw = (long *) malloc(((1 + niter/2))*sizeof(long));

	if (pw != NULL) {
		amergesort0Complex64(pl, pr, v, pw);
		free(pw);
		return 0;
	}else{
		return -1;
       }
}

UFUNC_DESCR2(amergesortComplex64, sizeof(Complex64), sizeof(long));


void heapsort0Complex64(Complex64 *a, long n)
{
	Complex64 tmp;
	long i,j,l;

	/* The array needs to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && NUM_CLT(a[j], a[j+1])) 
				j += 1;
			if (NUM_CLT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && NUM_CLT(a[j], a[j+1]))
				j++;
			if (NUM_CLT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int heapsortComplex64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Complex64 *pl = (Complex64 *) buffers[0];
        
	heapsort0Complex64(pl, niter);
	return 0;
}

UFUNC_DESCR1(heapsortComplex64, sizeof(Complex64));


void aheapsort0Complex64(long *a, long n, Complex64 *v)
{
	long i,j,l, tmp;

	/* The arrays need to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && NUM_CLT(v[a[j]], v[a[j+1]])) 
				j += 1;
			if (NUM_CLT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && NUM_CLT(v[a[j]], v[a[j+1]]))
				j++;
			if (NUM_CLT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int aheapsortComplex64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Complex64 *v = (Complex64 *) buffers[0];
	long *pl = (long *) buffers[1]; 

	aheapsort0Complex64(pl, niter, v);
	return 0;
}

UFUNC_DESCR2(aheapsortComplex64, sizeof(Complex64), sizeof(long));


void sort0Complex64(Complex64 *v, long left, long right)
{
  Complex64 temp;
  long i, last, lastl, lastr, diff;

  diff = right - left;
  if (diff <= 0)
     return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  /* Split array into values < pivot, and values > pivot. */
  swap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (NUM_CLT(v[i], v[left]))
    {
      ++ last;
      swap(last, i);
    }
  swap(left, last);
  
  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && NUM_CEQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && NUM_CEQ(v[last],v[lastr]))
     ++lastr;

  sort0Complex64(v, left, lastl);
  sort0Complex64(v, lastr, right);
}

static int sortComplex64(long niter, long ninargs, long noutargs,
           void **buffers, long *bsizes)
{
    BEGIN_THREADS
    sort0Complex64( (Complex64 *) buffers[0], 0, niter-1);
    END_THREADS
    return 0;
}

UFUNC_DESCR1(sortComplex64, sizeof(Complex64));

void asort0Complex64(Complex64 *v, long *w, long left, long right)
{
  Complex64 temp;
  long  wtemp, i, last, lastl, lastr;

  if (left >= right)
    return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  wswap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (NUM_CLT(v[i], v[left]))
    {
      ++ last;
      wswap(last, i);
    }
  wswap(left, last);

  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && NUM_CEQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && NUM_CEQ(v[last],v[lastr]))
     ++lastr;

  asort0Complex64(v, w, left, lastl);
  asort0Complex64(v, w, lastr, right);
}

static int asortComplex64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
    BEGIN_THREADS
    asort0Complex64( (Complex64 *) buffers[0], (long *) buffers[1],
       0, niter-1);
    END_THREADS
    return 0;
}

CFUNC_DESCR(asortComplex64, CFUNC_UFUNC, CHECK_ALIGN, 0, 2, sizeof(Complex64), sizeof(long), 0, 0, 0, 0);

static long searchComplex64(long na, Complex64 *a, Complex64 v)
{
  long i = 0;
  Complex64 *b;
  while(na > 10)
    {
      long mid = na>>1;
      if (NUM_CLE(v, a[mid]))
        na = mid;
      else
        {
          i += mid;
          a += mid;
          na -= mid;
        }
    }
  b = a;
  while((NUM_CLT(*b, v)) && na--)
    ++ b;
  return i+(b-a);
}

static int searchsortedComplex64(int niter, int ninargs, int noutargs,
          void **buffers, long *bsizes)
{
  maybelong   nbins;   
  Complex64 *bins;
  Complex64 *values;
  long       *indices;
  maybelong i;
  
  if (NA_checkOneCBuffer("searchsortedComplex64", 1,
       buffers[0], bsizes[0], sizeof(maybelong)))
       return -1;
  nbins    = *(maybelong *)  buffers[0];

  if (NA_checkOneCBuffer("searchsortedComplex64", nbins,
      buffers[1], bsizes[1], sizeof(Complex64)))
      return -1;
  bins    = (Complex64 *) buffers[1];

  if (NA_checkOneCBuffer("searchsortedComplex64", niter,
      buffers[2], bsizes[2], sizeof(Complex64)))
      return -1;
  values  = (Complex64 *) buffers[2];

  if (NA_checkOneCBuffer("searchsortedComplex64", niter,
      buffers[3], bsizes[3], sizeof(long)))
      return -1;
  indices = (long *)   buffers[3];

  if (NA_checkIo("searchsortedComplex64", 3, 1, ninargs, noutargs))
     return -1;

  BEGIN_THREADS
  for(i=0; i<niter; i++)
     indices[i] = searchComplex64(nbins, bins, values[i]);
  END_THREADS
  return 0;
}

SELF_CHECKED_CFUNC_DESCR(searchsortedComplex64, CFUNC_UFUNC);

static int fillarrayComplex64 (long niter, long ninargs, long noutargs, 
			     void **buffers, long *bsizes) {
    long i;
    Complex64 start, delta;
    Complex64 *tin;
    Complex64 *tparams;
    Int8 itemsizes[2] = { sizeof(Complex64), sizeof(Complex64) };
    Int8 iters[2] = { 0, 2 };

    if (NA_checkIo("fillarrayComplex64", 1, 1, ninargs, noutargs) ||
	NA_checkNCBuffers("fillarrayComplex64", 2,
			 niter, buffers, bsizes, itemsizes, iters))
	    return -1;

    tin = (Complex64 *) buffers[0];
    tparams = (Complex64 *) buffers[1];
    start = tparams[0];
    delta = tparams[1];

    BEGIN_THREADS
    for (i=0; i<niter; i++, tin++) {
        NUM_CASS(*tin, start); NUM_CADD(start, delta, start);
    }
    END_THREADS
    return 0;
}

SELF_CHECKED_CFUNC_DESCR(fillarrayComplex64, CFUNC_UFUNC);

static int nonzeroCoordsComplex64(long niter, long ninargs, long noutargs,
                               void **buffers, long *bsizes)
{
	Complex64   *source;
	maybelong  *sstride;
	long  **output;
	long i, minbsize, next = 0;
	
	if (NA_checkIo("nonzeroCoordsComplex64", 2, noutargs, ninargs, noutargs))
		return -1;

	if (NA_checkOneCBuffer("nonzeroCoordsComplex64", niter,
		      buffers[0], bsizes[0], sizeof(Complex64)))
		return -1;
	    
	if (NA_checkOneCBuffer("nonzeroCoordsComplex64", noutargs,
		      buffers[1], bsizes[1], sizeof(maybelong)))
		return -1;

	/* Check alignment only.  Storage optimization makes numarray
	 shorter than niter likely, possibly even 0! */
	for (i=0; i<noutargs; i++)
		if (NA_checkOneCBuffer("nonzeroCoordsComplex64", 0,
			      buffers[2+i], bsizes[2+i], sizeof(long)))
			return -1;

	source     = (Complex64 *)  buffers[0];
	sstride    = (maybelong  *)  buffers[1];
	output     = (long **) &buffers[2];

	minbsize = bsizes[0];
	for(i=0; i<noutargs; i++)
		if (sstride[i] <= 0) {
			PyErr_Format(PyExc_ValueError,
			     "nonzeroCoordsComplex64: bad stride[%ld].\n",
				     i);
			return -1;
		}
		else if (bsizes[i+2] < minbsize)
			minbsize = bsizes[i+2];
	
	next = 0;
	for(i=0; i<niter; i++)
	{
		if (((source[i].r !=0) || (source[i].i != 0)))
		{
			long j, index = i;
			if (next >= minbsize) {
				PyErr_Format(PyExc_ValueError,
					     "nonzeroCoordsComplex64: insufficient index space.\n");
				return -1;
			}
			for(j=0; j<noutargs; j++)
			{
				output[j][next] = index / sstride[j];
                                index %= sstride[j];
			}
			++ next;
		}
	}
	return 0;
}

SELF_CHECKED_CFUNC_DESCR(nonzeroCoordsComplex64, CFUNC_UFUNC);


/****************** Int64 *******************/


void  quicksort0Int64(Int64 *pl, Int64 *pr)
{
	Int64 vp, SWAP_temp;
	Int64 *stack[100], **sptr = stack, *pm, *pi, *pj, *pt;
        
	for(;;) {
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			if (STDC_LT(*pr,*pm)) SWAP(*pr,*pm);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			vp = *pm;
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(*pi,vp));
				do --pj; while (STDC_LT(vp,*pj));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, *pt);) {
				*pj-- = *pt--;
			}
			*pj = vp;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int quicksortInt64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int64 *pl = (Int64 *) buffers[0]; 
	Int64 *pr = pl + niter - 1;

	quicksort0Int64(pl, pr);
	return 0;
}

UFUNC_DESCR1(quicksortInt64, sizeof(Int64));


void  aquicksort0Int64(long *pl, long *pr, Int64 *v)
{
	Int64 vp;
	long SWAP_temp;
	long *stack[100], **sptr = stack, *pm, *pi, *pj, *pt, vi;
        
	for(;;) {	
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			if (STDC_LT(v[*pr],v[*pm])) SWAP(*pr,*pm);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			vp = v[*pm];
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(v[*pi],vp));
				do --pj; while (STDC_LT(vp,v[*pj]));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, v[*pt]);) {
				*pj-- = *pt--;
			}
			*pj = vi;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int aquicksortInt64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int64 *v = (Int64 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;

	aquicksort0Int64(pl, pr, v);
	return 0;
}

UFUNC_DESCR2(aquicksortInt64, sizeof(Int64), sizeof(long));


void mergesort0Int64(Int64 *pl, Int64 *pr, Int64 *pw)
{
	Int64 vp, *pi, *pj, *pk, *pm;
	
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		mergesort0Int64(pl,pm-1,pw);
		mergesort0Int64(pm,pr,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(*pk,*pj)) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, *pk); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vp;
		}
        }
}

static int mergesortInt64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int64 *pl = (Int64 *) buffers[0];
	Int64 *pr = pl + niter - 1;
	Int64 *pw = (Int64 *) malloc(((1 + niter/2))*sizeof(Int64));

	if (pw != NULL) {
		mergesort0Int64(pl, pr, pw);
		free(pw);
		return 0;
	}else{
		return -1;
	}
}

UFUNC_DESCR1(mergesortInt64, sizeof(Int64));


void amergesort0Int64(long *pl, long *pr, Int64 *v, long *pw)
{
        Int64 vp;
	long vi, *pi, *pj, *pk, *pm;
        
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		amergesort0Int64(pl,pm-1,v,pw);
		amergesort0Int64(pm,pr,v,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(v[*pk],v[*pj])) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}	
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, v[*pk]); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vi;
		}
	}
}

static int amergesortInt64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int64 *v  = (Int64 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;
	long *pw = (long *) malloc(((1 + niter/2))*sizeof(long));

	if (pw != NULL) {
		amergesort0Int64(pl, pr, v, pw);
		free(pw);
		return 0;
	}else{
		return -1;
       }
}

UFUNC_DESCR2(amergesortInt64, sizeof(Int64), sizeof(long));


void heapsort0Int64(Int64 *a, long n)
{
	Int64 tmp;
	long i,j,l;

	/* The array needs to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1])) 
				j += 1;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1]))
				j++;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int heapsortInt64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int64 *pl = (Int64 *) buffers[0];
        
	heapsort0Int64(pl, niter);
	return 0;
}

UFUNC_DESCR1(heapsortInt64, sizeof(Int64));


void aheapsort0Int64(long *a, long n, Int64 *v)
{
	long i,j,l, tmp;

	/* The arrays need to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]])) 
				j += 1;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]]))
				j++;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int aheapsortInt64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	Int64 *v = (Int64 *) buffers[0];
	long *pl = (long *) buffers[1]; 

	aheapsort0Int64(pl, niter, v);
	return 0;
}

UFUNC_DESCR2(aheapsortInt64, sizeof(Int64), sizeof(long));


void sort0Int64(Int64 *v, long left, long right)
{
  Int64 temp;
  long i, last, lastl, lastr, diff;

  diff = right - left;
  if (diff <= 0)
     return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  /* Split array into values < pivot, and values > pivot. */
  swap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      swap(last, i);
    }
  swap(left, last);
  
  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  sort0Int64(v, left, lastl);
  sort0Int64(v, lastr, right);
}

static int sortInt64(long niter, long ninargs, long noutargs,
           void **buffers, long *bsizes)
{
    BEGIN_THREADS
    sort0Int64( (Int64 *) buffers[0], 0, niter-1);
    END_THREADS
    return 0;
}

UFUNC_DESCR1(sortInt64, sizeof(Int64));

void asort0Int64(Int64 *v, long *w, long left, long right)
{
  Int64 temp;
  long  wtemp, i, last, lastl, lastr;

  if (left >= right)
    return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  wswap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      wswap(last, i);
    }
  wswap(left, last);

  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  asort0Int64(v, w, left, lastl);
  asort0Int64(v, w, lastr, right);
}

static int asortInt64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
    BEGIN_THREADS
    asort0Int64( (Int64 *) buffers[0], (long *) buffers[1],
       0, niter-1);
    END_THREADS
    return 0;
}

CFUNC_DESCR(asortInt64, CFUNC_UFUNC, CHECK_ALIGN, 0, 2, sizeof(Int64), sizeof(long), 0, 0, 0, 0);

static long searchInt64(long na, Int64 *a, Int64 v)
{
  long i = 0;
  Int64 *b;
  while(na > 10)
    {
      long mid = na>>1;
      if (STDC_LE(v, a[mid]))
        na = mid;
      else
        {
          i += mid;
          a += mid;
          na -= mid;
        }
    }
  b = a;
  while((STDC_LT(*b, v)) && na--)
    ++ b;
  return i+(b-a);
}

static int searchsortedInt64(int niter, int ninargs, int noutargs,
          void **buffers, long *bsizes)
{
  maybelong   nbins;   
  Int64 *bins;
  Int64 *values;
  long       *indices;
  maybelong i;
  
  if (NA_checkOneCBuffer("searchsortedInt64", 1,
       buffers[0], bsizes[0], sizeof(maybelong)))
       return -1;
  nbins    = *(maybelong *)  buffers[0];

  if (NA_checkOneCBuffer("searchsortedInt64", nbins,
      buffers[1], bsizes[1], sizeof(Int64)))
      return -1;
  bins    = (Int64 *) buffers[1];

  if (NA_checkOneCBuffer("searchsortedInt64", niter,
      buffers[2], bsizes[2], sizeof(Int64)))
      return -1;
  values  = (Int64 *) buffers[2];

  if (NA_checkOneCBuffer("searchsortedInt64", niter,
      buffers[3], bsizes[3], sizeof(long)))
      return -1;
  indices = (long *)   buffers[3];

  if (NA_checkIo("searchsortedInt64", 3, 1, ninargs, noutargs))
     return -1;

  BEGIN_THREADS
  for(i=0; i<niter; i++)
     indices[i] = searchInt64(nbins, bins, values[i]);
  END_THREADS
  return 0;
}

SELF_CHECKED_CFUNC_DESCR(searchsortedInt64, CFUNC_UFUNC);

static int fillarrayInt64 (long niter, long ninargs, long noutargs, 
			     void **buffers, long *bsizes) {
    long i;
    Int64 start, delta;
    Int64 *tin;
    Int64 *tparams;
    Int8 itemsizes[2] = { sizeof(Int64), sizeof(Int64) };
    Int8 iters[2] = { 0, 2 };

    if (NA_checkIo("fillarrayInt64", 1, 1, ninargs, noutargs) ||
	NA_checkNCBuffers("fillarrayInt64", 2,
			 niter, buffers, bsizes, itemsizes, iters))
	    return -1;

    tin = (Int64 *) buffers[0];
    tparams = (Int64 *) buffers[1];
    start = tparams[0];
    delta = tparams[1];

    BEGIN_THREADS
    for (i=0; i<niter; i++, tin++) {
        *tin = start;  start += delta;
    }
    END_THREADS
    return 0;
}

SELF_CHECKED_CFUNC_DESCR(fillarrayInt64, CFUNC_UFUNC);

static int nonzeroCoordsInt64(long niter, long ninargs, long noutargs,
                               void **buffers, long *bsizes)
{
	Int64   *source;
	maybelong  *sstride;
	long  **output;
	long i, minbsize, next = 0;
	
	if (NA_checkIo("nonzeroCoordsInt64", 2, noutargs, ninargs, noutargs))
		return -1;

	if (NA_checkOneCBuffer("nonzeroCoordsInt64", niter,
		      buffers[0], bsizes[0], sizeof(Int64)))
		return -1;
	    
	if (NA_checkOneCBuffer("nonzeroCoordsInt64", noutargs,
		      buffers[1], bsizes[1], sizeof(maybelong)))
		return -1;

	/* Check alignment only.  Storage optimization makes numarray
	 shorter than niter likely, possibly even 0! */
	for (i=0; i<noutargs; i++)
		if (NA_checkOneCBuffer("nonzeroCoordsInt64", 0,
			      buffers[2+i], bsizes[2+i], sizeof(long)))
			return -1;

	source     = (Int64 *)  buffers[0];
	sstride    = (maybelong  *)  buffers[1];
	output     = (long **) &buffers[2];

	minbsize = bsizes[0];
	for(i=0; i<noutargs; i++)
		if (sstride[i] <= 0) {
			PyErr_Format(PyExc_ValueError,
			     "nonzeroCoordsInt64: bad stride[%ld].\n",
				     i);
			return -1;
		}
		else if (bsizes[i+2] < minbsize)
			minbsize = bsizes[i+2];
	
	next = 0;
	for(i=0; i<niter; i++)
	{
		if (source[i] != 0)
		{
			long j, index = i;
			if (next >= minbsize) {
				PyErr_Format(PyExc_ValueError,
					     "nonzeroCoordsInt64: insufficient index space.\n");
				return -1;
			}
			for(j=0; j<noutargs; j++)
			{
				output[j][next] = index / sstride[j];
                                index %= sstride[j];
			}
			++ next;
		}
	}
	return 0;
}

SELF_CHECKED_CFUNC_DESCR(nonzeroCoordsInt64, CFUNC_UFUNC);


/****************** UInt64 *******************/


void  quicksort0UInt64(UInt64 *pl, UInt64 *pr)
{
	UInt64 vp, SWAP_temp;
	UInt64 *stack[100], **sptr = stack, *pm, *pi, *pj, *pt;
        
	for(;;) {
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			if (STDC_LT(*pr,*pm)) SWAP(*pr,*pm);
			if (STDC_LT(*pm,*pl)) SWAP(*pm,*pl);
			vp = *pm;
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(*pi,vp));
				do --pj; while (STDC_LT(vp,*pj));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, *pt);) {
				*pj-- = *pt--;
			}
			*pj = vp;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int quicksortUInt64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt64 *pl = (UInt64 *) buffers[0]; 
	UInt64 *pr = pl + niter - 1;

	quicksort0UInt64(pl, pr);
	return 0;
}

UFUNC_DESCR1(quicksortUInt64, sizeof(UInt64));


void  aquicksort0UInt64(long *pl, long *pr, UInt64 *v)
{
	UInt64 vp;
	long SWAP_temp;
	long *stack[100], **sptr = stack, *pm, *pi, *pj, *pt, vi;
        
	for(;;) {	
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			if (STDC_LT(v[*pr],v[*pm])) SWAP(*pr,*pm);
			if (STDC_LT(v[*pm],v[*pl])) SWAP(*pm,*pl);
			vp = v[*pm];
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (STDC_LT(v[*pi],vp));
				do --pj; while (STDC_LT(vp,v[*pj]));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pt = pi - 1; pj > pl && STDC_LT(vp, v[*pt]);) {
				*pj-- = *pt--;
			}
			*pj = vi;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

static int aquicksortUInt64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt64 *v = (UInt64 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;

	aquicksort0UInt64(pl, pr, v);
	return 0;
}

UFUNC_DESCR2(aquicksortUInt64, sizeof(UInt64), sizeof(long));


void mergesort0UInt64(UInt64 *pl, UInt64 *pr, UInt64 *pw)
{
	UInt64 vp, *pi, *pj, *pk, *pm;
	
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		mergesort0UInt64(pl,pm-1,pw);
		mergesort0UInt64(pm,pr,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(*pk,*pj)) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, *pk); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vp;
		}
        }
}

static int mergesortUInt64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt64 *pl = (UInt64 *) buffers[0];
	UInt64 *pr = pl + niter - 1;
	UInt64 *pw = (UInt64 *) malloc(((1 + niter/2))*sizeof(UInt64));

	if (pw != NULL) {
		mergesort0UInt64(pl, pr, pw);
		free(pw);
		return 0;
	}else{
		return -1;
	}
}

UFUNC_DESCR1(mergesortUInt64, sizeof(UInt64));


void amergesort0UInt64(long *pl, long *pr, UInt64 *v, long *pw)
{
        UInt64 vp;
	long vi, *pi, *pj, *pk, *pm;
        
	if (pr - pl > 20) {
		/* merge sort */
		pm = pl + ((pr - pl + 1)>>1);
		amergesort0UInt64(pl,pm-1,v,pw);
		amergesort0UInt64(pm,pr,v,pw);
		for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
			*pi = *pj;
		}
		for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
			if (STDC_LE(v[*pk],v[*pj])) {
				*pm = *pk;
				++pk;
			}else{
				*pm = *pj;
				++pj;
			}	
		}
		for(; pk < pi; ++pm, ++pk) {
			*pm = *pk;
		}
	}else{
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vi = *pi;
			vp = v[vi];
			for(pj = pi, pk = pi - 1; pj > pl && STDC_LT(vp, v[*pk]); --pj, --pk) {
				*pj = *pk;
			}
			*pj = vi;
		}
	}
}

static int amergesortUInt64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt64 *v  = (UInt64 *) buffers[0];
	long *pl = (long *) buffers[1]; 
	long *pr = pl + niter - 1;
	long *pw = (long *) malloc(((1 + niter/2))*sizeof(long));

	if (pw != NULL) {
		amergesort0UInt64(pl, pr, v, pw);
		free(pw);
		return 0;
	}else{
		return -1;
       }
}

UFUNC_DESCR2(amergesortUInt64, sizeof(UInt64), sizeof(long));


void heapsort0UInt64(UInt64 *a, long n)
{
	UInt64 tmp;
	long i,j,l;

	/* The array needs to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1])) 
				j += 1;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(a[j], a[j+1]))
				j++;
			if (STDC_LT(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int heapsortUInt64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt64 *pl = (UInt64 *) buffers[0];
        
	heapsort0UInt64(pl, niter);
	return 0;
}

UFUNC_DESCR1(heapsortUInt64, sizeof(UInt64));


void aheapsort0UInt64(long *a, long n, UInt64 *v)
{
	long i,j,l, tmp;

	/* The arrays need to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]])) 
				j += 1;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && STDC_LT(v[a[j]], v[a[j+1]]))
				j++;
			if (STDC_LT(v[tmp], v[a[j]])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}

static int aheapsortUInt64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
	UInt64 *v = (UInt64 *) buffers[0];
	long *pl = (long *) buffers[1]; 

	aheapsort0UInt64(pl, niter, v);
	return 0;
}

UFUNC_DESCR2(aheapsortUInt64, sizeof(UInt64), sizeof(long));


void sort0UInt64(UInt64 *v, long left, long right)
{
  UInt64 temp;
  long i, last, lastl, lastr, diff;

  diff = right - left;
  if (diff <= 0)
     return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  /* Split array into values < pivot, and values > pivot. */
  swap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      swap(last, i);
    }
  swap(left, last);
  
  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  sort0UInt64(v, left, lastl);
  sort0UInt64(v, lastr, right);
}

static int sortUInt64(long niter, long ninargs, long noutargs,
           void **buffers, long *bsizes)
{
    BEGIN_THREADS
    sort0UInt64( (UInt64 *) buffers[0], 0, niter-1);
    END_THREADS
    return 0;
}

UFUNC_DESCR1(sortUInt64, sizeof(UInt64));

void asort0UInt64(UInt64 *v, long *w, long left, long right)
{
  UInt64 temp;
  long  wtemp, i, last, lastl, lastr;

  if (left >= right)
    return;

  /* choose the pivot randomly. */
  i = left + (int) (((right-left)*1.0*rand())/(RAND_MAX+1.0));

  wswap(left, i);
  last = left;
  for(i=left+1; i<=right; i++)
    if (STDC_LT(v[i], v[left]))
    {
      ++ last;
      wswap(last, i);
    }
  wswap(left, last);

  lastl = last-1;
  lastr = last+1;

  /* Exclude values equal to pivot from recursion */
  while((left < lastl) && STDC_EQ(v[last],v[lastl]))
     --lastl;
  while((lastr < right) && STDC_EQ(v[last],v[lastr]))
     ++lastr;

  asort0UInt64(v, w, left, lastl);
  asort0UInt64(v, w, lastr, right);
}

static int asortUInt64(long niter, long ninargs, long noutargs, void **buffers, long *bsizes)
{
    BEGIN_THREADS
    asort0UInt64( (UInt64 *) buffers[0], (long *) buffers[1],
       0, niter-1);
    END_THREADS
    return 0;
}

CFUNC_DESCR(asortUInt64, CFUNC_UFUNC, CHECK_ALIGN, 0, 2, sizeof(UInt64), sizeof(long), 0, 0, 0, 0);

static long searchUInt64(long na, UInt64 *a, UInt64 v)
{
  long i = 0;
  UInt64 *b;
  while(na > 10)
    {
      long mid = na>>1;
      if (STDC_LE(v, a[mid]))
        na = mid;
      else
        {
          i += mid;
          a += mid;
          na -= mid;
        }
    }
  b = a;
  while((STDC_LT(*b, v)) && na--)
    ++ b;
  return i+(b-a);
}

static int searchsortedUInt64(int niter, int ninargs, int noutargs,
          void **buffers, long *bsizes)
{
  maybelong   nbins;   
  UInt64 *bins;
  UInt64 *values;
  long       *indices;
  maybelong i;
  
  if (NA_checkOneCBuffer("searchsortedUInt64", 1,
       buffers[0], bsizes[0], sizeof(maybelong)))
       return -1;
  nbins    = *(maybelong *)  buffers[0];

  if (NA_checkOneCBuffer("searchsortedUInt64", nbins,
      buffers[1], bsizes[1], sizeof(UInt64)))
      return -1;
  bins    = (UInt64 *) buffers[1];

  if (NA_checkOneCBuffer("searchsortedUInt64", niter,
      buffers[2], bsizes[2], sizeof(UInt64)))
      return -1;
  values  = (UInt64 *) buffers[2];

  if (NA_checkOneCBuffer("searchsortedUInt64", niter,
      buffers[3], bsizes[3], sizeof(long)))
      return -1;
  indices = (long *)   buffers[3];

  if (NA_checkIo("searchsortedUInt64", 3, 1, ninargs, noutargs))
     return -1;

  BEGIN_THREADS
  for(i=0; i<niter; i++)
     indices[i] = searchUInt64(nbins, bins, values[i]);
  END_THREADS
  return 0;
}

SELF_CHECKED_CFUNC_DESCR(searchsortedUInt64, CFUNC_UFUNC);

static int fillarrayUInt64 (long niter, long ninargs, long noutargs, 
			     void **buffers, long *bsizes) {
    long i;
    UInt64 start, delta;
    UInt64 *tin;
    UInt64 *tparams;
    Int8 itemsizes[2] = { sizeof(UInt64), sizeof(UInt64) };
    Int8 iters[2] = { 0, 2 };

    if (NA_checkIo("fillarrayUInt64", 1, 1, ninargs, noutargs) ||
	NA_checkNCBuffers("fillarrayUInt64", 2,
			 niter, buffers, bsizes, itemsizes, iters))
	    return -1;

    tin = (UInt64 *) buffers[0];
    tparams = (UInt64 *) buffers[1];
    start = tparams[0];
    delta = tparams[1];

    BEGIN_THREADS
    for (i=0; i<niter; i++, tin++) {
        *tin = start;  start += delta;
    }
    END_THREADS
    return 0;
}

SELF_CHECKED_CFUNC_DESCR(fillarrayUInt64, CFUNC_UFUNC);

static int nonzeroCoordsUInt64(long niter, long ninargs, long noutargs,
                               void **buffers, long *bsizes)
{
	UInt64   *source;
	maybelong  *sstride;
	long  **output;
	long i, minbsize, next = 0;
	
	if (NA_checkIo("nonzeroCoordsUInt64", 2, noutargs, ninargs, noutargs))
		return -1;

	if (NA_checkOneCBuffer("nonzeroCoordsUInt64", niter,
		      buffers[0], bsizes[0], sizeof(UInt64)))
		return -1;
	    
	if (NA_checkOneCBuffer("nonzeroCoordsUInt64", noutargs,
		      buffers[1], bsizes[1], sizeof(maybelong)))
		return -1;

	/* Check alignment only.  Storage optimization makes numarray
	 shorter than niter likely, possibly even 0! */
	for (i=0; i<noutargs; i++)
		if (NA_checkOneCBuffer("nonzeroCoordsUInt64", 0,
			      buffers[2+i], bsizes[2+i], sizeof(long)))
			return -1;

	source     = (UInt64 *)  buffers[0];
	sstride    = (maybelong  *)  buffers[1];
	output     = (long **) &buffers[2];

	minbsize = bsizes[0];
	for(i=0; i<noutargs; i++)
		if (sstride[i] <= 0) {
			PyErr_Format(PyExc_ValueError,
			     "nonzeroCoordsUInt64: bad stride[%ld].\n",
				     i);
			return -1;
		}
		else if (bsizes[i+2] < minbsize)
			minbsize = bsizes[i+2];
	
	next = 0;
	for(i=0; i<niter; i++)
	{
		if (source[i] != 0)
		{
			long j, index = i;
			if (next >= minbsize) {
				PyErr_Format(PyExc_ValueError,
					     "nonzeroCoordsUInt64: insufficient index space.\n");
				return -1;
			}
			for(j=0; j<noutargs; j++)
			{
				output[j][next] = index / sstride[j];
                                index %= sstride[j];
			}
			++ next;
		}
	}
	return 0;
}

SELF_CHECKED_CFUNC_DESCR(nonzeroCoordsUInt64, CFUNC_UFUNC);


static PyMethodDef _sortMethods[] = {

	{NULL,      NULL}        /* Sentinel */
};

static PyObject *init_funcDict(void) {
    PyObject *dict;
    dict = PyDict_New();
    NA_add_cfunc(dict, "('Bool', 'sort')", (void *) &sortBool_descr);
    NA_add_cfunc(dict, "('Bool', 'asort')", (void *) &asortBool_descr);
    NA_add_cfunc(dict, "('Bool', 'searchsorted')", (void *) &searchsortedBool_descr);
    NA_add_cfunc(dict, "('Bool', 'fillarray')", (void *) &fillarrayBool_descr);
    NA_add_cfunc(dict, "('Bool', 'nonzeroCoords')", (void *) &nonzeroCoordsBool_descr);
    NA_add_cfunc(dict, "('Bool', 'quicksort')", (void *) &quicksortBool_descr);
    NA_add_cfunc(dict, "('Bool', 'aquicksort')", (void *) &aquicksortBool_descr);
    NA_add_cfunc(dict, "('Bool', 'mergesort')", (void *) &mergesortBool_descr);
    NA_add_cfunc(dict, "('Bool', 'amergesort')", (void *) &amergesortBool_descr);
    NA_add_cfunc(dict, "('Bool', 'heapsort')", (void *) &heapsortBool_descr);
    NA_add_cfunc(dict, "('Bool', 'aheapsort')", (void *) &aheapsortBool_descr);
    NA_add_cfunc(dict, "('Int8', 'sort')", (void *) &sortInt8_descr);
    NA_add_cfunc(dict, "('Int8', 'asort')", (void *) &asortInt8_descr);
    NA_add_cfunc(dict, "('Int8', 'searchsorted')", (void *) &searchsortedInt8_descr);
    NA_add_cfunc(dict, "('Int8', 'fillarray')", (void *) &fillarrayInt8_descr);
    NA_add_cfunc(dict, "('Int8', 'nonzeroCoords')", (void *) &nonzeroCoordsInt8_descr);
    NA_add_cfunc(dict, "('Int8', 'quicksort')", (void *) &quicksortInt8_descr);
    NA_add_cfunc(dict, "('Int8', 'aquicksort')", (void *) &aquicksortInt8_descr);
    NA_add_cfunc(dict, "('Int8', 'mergesort')", (void *) &mergesortInt8_descr);
    NA_add_cfunc(dict, "('Int8', 'amergesort')", (void *) &amergesortInt8_descr);
    NA_add_cfunc(dict, "('Int8', 'heapsort')", (void *) &heapsortInt8_descr);
    NA_add_cfunc(dict, "('Int8', 'aheapsort')", (void *) &aheapsortInt8_descr);
    NA_add_cfunc(dict, "('UInt8', 'sort')", (void *) &sortUInt8_descr);
    NA_add_cfunc(dict, "('UInt8', 'asort')", (void *) &asortUInt8_descr);
    NA_add_cfunc(dict, "('UInt8', 'searchsorted')", (void *) &searchsortedUInt8_descr);
    NA_add_cfunc(dict, "('UInt8', 'fillarray')", (void *) &fillarrayUInt8_descr);
    NA_add_cfunc(dict, "('UInt8', 'nonzeroCoords')", (void *) &nonzeroCoordsUInt8_descr);
    NA_add_cfunc(dict, "('UInt8', 'quicksort')", (void *) &quicksortUInt8_descr);
    NA_add_cfunc(dict, "('UInt8', 'aquicksort')", (void *) &aquicksortUInt8_descr);
    NA_add_cfunc(dict, "('UInt8', 'mergesort')", (void *) &mergesortUInt8_descr);
    NA_add_cfunc(dict, "('UInt8', 'amergesort')", (void *) &amergesortUInt8_descr);
    NA_add_cfunc(dict, "('UInt8', 'heapsort')", (void *) &heapsortUInt8_descr);
    NA_add_cfunc(dict, "('UInt8', 'aheapsort')", (void *) &aheapsortUInt8_descr);
    NA_add_cfunc(dict, "('Int16', 'sort')", (void *) &sortInt16_descr);
    NA_add_cfunc(dict, "('Int16', 'asort')", (void *) &asortInt16_descr);
    NA_add_cfunc(dict, "('Int16', 'searchsorted')", (void *) &searchsortedInt16_descr);
    NA_add_cfunc(dict, "('Int16', 'fillarray')", (void *) &fillarrayInt16_descr);
    NA_add_cfunc(dict, "('Int16', 'nonzeroCoords')", (void *) &nonzeroCoordsInt16_descr);
    NA_add_cfunc(dict, "('Int16', 'quicksort')", (void *) &quicksortInt16_descr);
    NA_add_cfunc(dict, "('Int16', 'aquicksort')", (void *) &aquicksortInt16_descr);
    NA_add_cfunc(dict, "('Int16', 'mergesort')", (void *) &mergesortInt16_descr);
    NA_add_cfunc(dict, "('Int16', 'amergesort')", (void *) &amergesortInt16_descr);
    NA_add_cfunc(dict, "('Int16', 'heapsort')", (void *) &heapsortInt16_descr);
    NA_add_cfunc(dict, "('Int16', 'aheapsort')", (void *) &aheapsortInt16_descr);
    NA_add_cfunc(dict, "('UInt16', 'sort')", (void *) &sortUInt16_descr);
    NA_add_cfunc(dict, "('UInt16', 'asort')", (void *) &asortUInt16_descr);
    NA_add_cfunc(dict, "('UInt16', 'searchsorted')", (void *) &searchsortedUInt16_descr);
    NA_add_cfunc(dict, "('UInt16', 'fillarray')", (void *) &fillarrayUInt16_descr);
    NA_add_cfunc(dict, "('UInt16', 'nonzeroCoords')", (void *) &nonzeroCoordsUInt16_descr);
    NA_add_cfunc(dict, "('UInt16', 'quicksort')", (void *) &quicksortUInt16_descr);
    NA_add_cfunc(dict, "('UInt16', 'aquicksort')", (void *) &aquicksortUInt16_descr);
    NA_add_cfunc(dict, "('UInt16', 'mergesort')", (void *) &mergesortUInt16_descr);
    NA_add_cfunc(dict, "('UInt16', 'amergesort')", (void *) &amergesortUInt16_descr);
    NA_add_cfunc(dict, "('UInt16', 'heapsort')", (void *) &heapsortUInt16_descr);
    NA_add_cfunc(dict, "('UInt16', 'aheapsort')", (void *) &aheapsortUInt16_descr);
    NA_add_cfunc(dict, "('Int32', 'sort')", (void *) &sortInt32_descr);
    NA_add_cfunc(dict, "('Int32', 'asort')", (void *) &asortInt32_descr);
    NA_add_cfunc(dict, "('Int32', 'searchsorted')", (void *) &searchsortedInt32_descr);
    NA_add_cfunc(dict, "('Int32', 'fillarray')", (void *) &fillarrayInt32_descr);
    NA_add_cfunc(dict, "('Int32', 'nonzeroCoords')", (void *) &nonzeroCoordsInt32_descr);
    NA_add_cfunc(dict, "('Int32', 'quicksort')", (void *) &quicksortInt32_descr);
    NA_add_cfunc(dict, "('Int32', 'aquicksort')", (void *) &aquicksortInt32_descr);
    NA_add_cfunc(dict, "('Int32', 'mergesort')", (void *) &mergesortInt32_descr);
    NA_add_cfunc(dict, "('Int32', 'amergesort')", (void *) &amergesortInt32_descr);
    NA_add_cfunc(dict, "('Int32', 'heapsort')", (void *) &heapsortInt32_descr);
    NA_add_cfunc(dict, "('Int32', 'aheapsort')", (void *) &aheapsortInt32_descr);
    NA_add_cfunc(dict, "('UInt32', 'sort')", (void *) &sortUInt32_descr);
    NA_add_cfunc(dict, "('UInt32', 'asort')", (void *) &asortUInt32_descr);
    NA_add_cfunc(dict, "('UInt32', 'searchsorted')", (void *) &searchsortedUInt32_descr);
    NA_add_cfunc(dict, "('UInt32', 'fillarray')", (void *) &fillarrayUInt32_descr);
    NA_add_cfunc(dict, "('UInt32', 'nonzeroCoords')", (void *) &nonzeroCoordsUInt32_descr);
    NA_add_cfunc(dict, "('UInt32', 'quicksort')", (void *) &quicksortUInt32_descr);
    NA_add_cfunc(dict, "('UInt32', 'aquicksort')", (void *) &aquicksortUInt32_descr);
    NA_add_cfunc(dict, "('UInt32', 'mergesort')", (void *) &mergesortUInt32_descr);
    NA_add_cfunc(dict, "('UInt32', 'amergesort')", (void *) &amergesortUInt32_descr);
    NA_add_cfunc(dict, "('UInt32', 'heapsort')", (void *) &heapsortUInt32_descr);
    NA_add_cfunc(dict, "('UInt32', 'aheapsort')", (void *) &aheapsortUInt32_descr);
    NA_add_cfunc(dict, "('Float32', 'sort')", (void *) &sortFloat32_descr);
    NA_add_cfunc(dict, "('Float32', 'asort')", (void *) &asortFloat32_descr);
    NA_add_cfunc(dict, "('Float32', 'searchsorted')", (void *) &searchsortedFloat32_descr);
    NA_add_cfunc(dict, "('Float32', 'fillarray')", (void *) &fillarrayFloat32_descr);
    NA_add_cfunc(dict, "('Float32', 'nonzeroCoords')", (void *) &nonzeroCoordsFloat32_descr);
    NA_add_cfunc(dict, "('Float32', 'quicksort')", (void *) &quicksortFloat32_descr);
    NA_add_cfunc(dict, "('Float32', 'aquicksort')", (void *) &aquicksortFloat32_descr);
    NA_add_cfunc(dict, "('Float32', 'mergesort')", (void *) &mergesortFloat32_descr);
    NA_add_cfunc(dict, "('Float32', 'amergesort')", (void *) &amergesortFloat32_descr);
    NA_add_cfunc(dict, "('Float32', 'heapsort')", (void *) &heapsortFloat32_descr);
    NA_add_cfunc(dict, "('Float32', 'aheapsort')", (void *) &aheapsortFloat32_descr);
    NA_add_cfunc(dict, "('Float64', 'sort')", (void *) &sortFloat64_descr);
    NA_add_cfunc(dict, "('Float64', 'asort')", (void *) &asortFloat64_descr);
    NA_add_cfunc(dict, "('Float64', 'searchsorted')", (void *) &searchsortedFloat64_descr);
    NA_add_cfunc(dict, "('Float64', 'fillarray')", (void *) &fillarrayFloat64_descr);
    NA_add_cfunc(dict, "('Float64', 'nonzeroCoords')", (void *) &nonzeroCoordsFloat64_descr);
    NA_add_cfunc(dict, "('Float64', 'quicksort')", (void *) &quicksortFloat64_descr);
    NA_add_cfunc(dict, "('Float64', 'aquicksort')", (void *) &aquicksortFloat64_descr);
    NA_add_cfunc(dict, "('Float64', 'mergesort')", (void *) &mergesortFloat64_descr);
    NA_add_cfunc(dict, "('Float64', 'amergesort')", (void *) &amergesortFloat64_descr);
    NA_add_cfunc(dict, "('Float64', 'heapsort')", (void *) &heapsortFloat64_descr);
    NA_add_cfunc(dict, "('Float64', 'aheapsort')", (void *) &aheapsortFloat64_descr);
    NA_add_cfunc(dict, "('Complex32', 'sort')", (void *) &sortComplex32_descr);
    NA_add_cfunc(dict, "('Complex32', 'asort')", (void *) &asortComplex32_descr);
    NA_add_cfunc(dict, "('Complex32', 'searchsorted')", (void *) &searchsortedComplex32_descr);
    NA_add_cfunc(dict, "('Complex32', 'fillarray')", (void *) &fillarrayComplex32_descr);
    NA_add_cfunc(dict, "('Complex32', 'nonzeroCoords')", (void *) &nonzeroCoordsComplex32_descr);
    NA_add_cfunc(dict, "('Complex32', 'quicksort')", (void *) &quicksortComplex32_descr);
    NA_add_cfunc(dict, "('Complex32', 'aquicksort')", (void *) &aquicksortComplex32_descr);
    NA_add_cfunc(dict, "('Complex32', 'mergesort')", (void *) &mergesortComplex32_descr);
    NA_add_cfunc(dict, "('Complex32', 'amergesort')", (void *) &amergesortComplex32_descr);
    NA_add_cfunc(dict, "('Complex32', 'heapsort')", (void *) &heapsortComplex32_descr);
    NA_add_cfunc(dict, "('Complex32', 'aheapsort')", (void *) &aheapsortComplex32_descr);
    NA_add_cfunc(dict, "('Complex64', 'sort')", (void *) &sortComplex64_descr);
    NA_add_cfunc(dict, "('Complex64', 'asort')", (void *) &asortComplex64_descr);
    NA_add_cfunc(dict, "('Complex64', 'searchsorted')", (void *) &searchsortedComplex64_descr);
    NA_add_cfunc(dict, "('Complex64', 'fillarray')", (void *) &fillarrayComplex64_descr);
    NA_add_cfunc(dict, "('Complex64', 'nonzeroCoords')", (void *) &nonzeroCoordsComplex64_descr);
    NA_add_cfunc(dict, "('Complex64', 'quicksort')", (void *) &quicksortComplex64_descr);
    NA_add_cfunc(dict, "('Complex64', 'aquicksort')", (void *) &aquicksortComplex64_descr);
    NA_add_cfunc(dict, "('Complex64', 'mergesort')", (void *) &mergesortComplex64_descr);
    NA_add_cfunc(dict, "('Complex64', 'amergesort')", (void *) &amergesortComplex64_descr);
    NA_add_cfunc(dict, "('Complex64', 'heapsort')", (void *) &heapsortComplex64_descr);
    NA_add_cfunc(dict, "('Complex64', 'aheapsort')", (void *) &aheapsortComplex64_descr);
    NA_add_cfunc(dict, "('Int64', 'sort')", (void *) &sortInt64_descr);
    NA_add_cfunc(dict, "('Int64', 'asort')", (void *) &asortInt64_descr);
    NA_add_cfunc(dict, "('Int64', 'searchsorted')", (void *) &searchsortedInt64_descr);
    NA_add_cfunc(dict, "('Int64', 'fillarray')", (void *) &fillarrayInt64_descr);
    NA_add_cfunc(dict, "('Int64', 'nonzeroCoords')", (void *) &nonzeroCoordsInt64_descr);
    NA_add_cfunc(dict, "('Int64', 'quicksort')", (void *) &quicksortInt64_descr);
    NA_add_cfunc(dict, "('Int64', 'aquicksort')", (void *) &aquicksortInt64_descr);
    NA_add_cfunc(dict, "('Int64', 'mergesort')", (void *) &mergesortInt64_descr);
    NA_add_cfunc(dict, "('Int64', 'amergesort')", (void *) &amergesortInt64_descr);
    NA_add_cfunc(dict, "('Int64', 'heapsort')", (void *) &heapsortInt64_descr);
    NA_add_cfunc(dict, "('Int64', 'aheapsort')", (void *) &aheapsortInt64_descr);
    NA_add_cfunc(dict, "('UInt64', 'sort')", (void *) &sortUInt64_descr);
    NA_add_cfunc(dict, "('UInt64', 'asort')", (void *) &asortUInt64_descr);
    NA_add_cfunc(dict, "('UInt64', 'searchsorted')", (void *) &searchsortedUInt64_descr);
    NA_add_cfunc(dict, "('UInt64', 'fillarray')", (void *) &fillarrayUInt64_descr);
    NA_add_cfunc(dict, "('UInt64', 'nonzeroCoords')", (void *) &nonzeroCoordsUInt64_descr);
    NA_add_cfunc(dict, "('UInt64', 'quicksort')", (void *) &quicksortUInt64_descr);
    NA_add_cfunc(dict, "('UInt64', 'aquicksort')", (void *) &aquicksortUInt64_descr);
    NA_add_cfunc(dict, "('UInt64', 'mergesort')", (void *) &mergesortUInt64_descr);
    NA_add_cfunc(dict, "('UInt64', 'amergesort')", (void *) &amergesortUInt64_descr);
    NA_add_cfunc(dict, "('UInt64', 'heapsort')", (void *) &heapsortUInt64_descr);
    NA_add_cfunc(dict, "('UInt64', 'aheapsort')", (void *) &aheapsortUInt64_descr);
    return dict;
}

/* platform independent*/
#ifdef MS_WIN32
__declspec(dllexport)
#endif
void init_sort(void) {
    PyObject *m, *d, *functions;
    m = Py_InitModule("_sort", _sortMethods);
    d = PyModule_GetDict(m);
    import_libnumarray();
    functions = init_funcDict();
    PyDict_SetItemString(d, "functionDict", functions);
    Py_DECREF(functions);
    ADD_VERSION(m);
}


syntax highlighted by Code2HTML, v. 0.9.1