/*   W     W   AAA   RRRR   N   N  III  N   N   GGG   !!!
**   W     W  A   A  R   R  NN  N   I   NN  N  G   G  !!!
**   W  W  W  AAAAA  RRRR   N N N   I   N N N  G       !
**    W W W   A   A  R   R  N  NN   I   N  NN  G  GG
**     W W    A   A  R   R  N   N  III  N   N   GGG   !!!
**
** WARNING: This file is program generated by codegenerator.py.
**
** DO NOT EDIT THIS FILE! Any changes made to this file will be lost!
*/

#include <Python.h>
#include <stdio.h>
#include "libnumarray.h"

#include <assert.h>

#define NA_ACOPYN(i, o) memcpy(o, i, N)

/* The following is used to copy nbytes of data for each element.   **
** As such it can be used to align any sort of data provided the    **
** output pointers used are aligned                                 */

static int copyNbytes(long dim, long nbytes, maybelong *niters,
            void *input,  long inboffset,  maybelong *inbstrides,
            void *output, long outboffset, maybelong *outbstrides) {
    long i, j;
    char *tin  = (char *) input  + inboffset;
    char *tout = (char *) output + outboffset;
    if (dim == 0) {
        for (i=0; i<niters[dim]; i++) {
            for (j=0; j<nbytes; j++) {
                *tout++ = *tin++;
            }
            tin = tin + inbstrides[dim] - nbytes;
            tout = tout + outbstrides[dim]- nbytes;
        }
    }
    else {
        for (i=0; i<niters[dim]; i++) {
            copyNbytes(dim-1, nbytes, niters,
                input,  inboffset  + i*inbstrides[dim],  inbstrides,
                output, outboffset + i*outbstrides[dim], outbstrides);
        }
    }
    return 0;
}

STRIDING_DESCR2(copyNbytes, !CHECK_ALIGN, -1, -1);

/* Copy a data buffer to a new string
**
** Arguments:
**
**   Tuple of iteration values for each dimension of input array.
**   Input buffer object.
**   Input byte offset.
**   Tuple of input byte strides.
**   Size of input data item in bytes.
**
** Returns Python string.
*/

static PyObject *copyToString(PyObject *self, PyObject *args) {
    PyObject *inbuffObj;
    PyObject *nitersObj, *inbstridesObj;
    PyObject *otemp, *outstring;
    long ltemp;

    int nniters, ninbstrides, nargs;
    long nbytes;
    maybelong niters[MAXDIM], inbstrides[MAXDIM], outbstrides[MAXDIM];
    void *inbuffer, *outbuffer;
    long i, inbsize, outbsize, nelements=1, inboffset;
    
    nargs = PyObject_Length(args);
    if (!PyArg_ParseTuple(args, "OOlOl",
            &nitersObj, &inbuffObj,  &inboffset, &inbstridesObj, &nbytes))
        return NULL;
        
    if (!PySequence_Check(nitersObj))
        return PyErr_Format(PyExc_TypeError,
                   "copyToString: invalid shape object");
    if (!PySequence_Check(inbstridesObj))
        return PyErr_Format(PyExc_TypeError,
                   "copyToString: invalid strides object");
        
    nniters = PyObject_Length(nitersObj);
    ninbstrides = PyObject_Length(inbstridesObj);
    if (nniters != ninbstrides)
        return PyErr_Format(PyExc_ValueError,
        "copyToString: shape & strides don't match");
        
    for (i=nniters-1; i>=0; i--) {
        otemp = PySequence_GetItem(nitersObj, i);
        if (PyInt_Check(otemp))
           ltemp = PyInt_AsLong(otemp);
        else if (PyLong_Check(otemp))
           ltemp = PyLong_AsLong(otemp);
        else
           return PyErr_Format(PyExc_TypeError,
                 "copyToString: non-integer shape element");
        nelements *= ltemp;
        niters[nniters-i-1] = ltemp;
        Py_DECREF(otemp);
        otemp = PySequence_GetItem(inbstridesObj, i);
        if (PyInt_Check(otemp))
            inbstrides[nniters-i-1] = PyInt_AsLong(otemp);
        else if (PyLong_Check(otemp))
            inbstrides[nniters-i-1] = PyLong_AsLong(otemp);
        else
           return PyErr_Format(PyExc_TypeError,
                 "copyToString: non-integer stride element");
        Py_DECREF(otemp);
    }
    if (!nelements)
       return PyString_FromStringAndSize("", 0);
    outbstrides[0] = nbytes;
    for (i=1; i<nniters; i++) {
        outbstrides[i] = outbstrides[i-1]*niters[i-1];
    }
    outbsize = outbstrides[nniters-1]*niters[nniters-1];
    outstring = PyString_FromStringAndSize(NULL, outbsize);
    if (!outstring)
        return NULL;
    outbuffer = (void *) PyString_AsString(outstring);
    
    if ((inbsize = NA_getBufferPtrAndSize(inbuffObj, 1, &inbuffer)) < 0)
       return PyErr_Format(PyExc_TypeError,
           "copyToString: Problem with array buffer");

    if (NA_checkOneStriding("copyToString", nniters, niters,
			  inboffset, inbstrides, inbsize, nbytes, 0) ||
	NA_checkOneStriding("copyToString", nniters, niters,
			  0, outbstrides, outbsize, nbytes, 0))
                          return NULL;

    BEGIN_THREADS
    copyNbytes(nniters-1, nbytes, niters,
          inbuffer, inboffset, inbstrides, outbuffer, 0, outbstrides);
    END_THREADS
    return outstring;
}

/* chooseXbytes functions are called as uFuncs... */

enum CLIP_MODE {
  CLIPPED,
  WRAPPED,
  RAISE
};

#define wrap(i, max)      while(i < 0)             i += max;        while(i >= max)          i -= max;

static int takeNbytes(long niter, long ninargs, long noutargs,
                         void **buffers, long *bsizes)
{
  maybelong  i, cMode, N;
  maybelong *scatteredstrides, *scatteredshape, **indices;
  char  *gathered, *scattered;
  maybelong nindices = ninargs-4, outi = ninargs+noutargs-1;

  if (NA_checkIo("takeNbytes", 4, 1, MIN(ninargs, 4), noutargs))
     return -1;

  if (nindices == 0)
     return 0;
  
  if (NA_checkOneCBuffer("takeNbytes", 2, buffers[0], bsizes[0], sizeof(maybelong)))
     return -1;
  else {
     cMode            =  ((maybelong *) buffers[0])[0];
     N                =  ((maybelong *) buffers[0])[1];
  }

  if (NA_checkOneCBuffer("takeNbytes", nindices, buffers[2], bsizes[2], sizeof(maybelong)))
     return -1;
  else {
     scatteredstrides =  (maybelong *)  buffers[2];
  }
  
  if (NA_checkOneCBuffer("takeNbytes", nindices, buffers[3], bsizes[3], sizeof(maybelong)))
     return -1;
  else {
     scatteredshape   =  (maybelong *)  buffers[3];
  }
  
  if (NA_checkOneStriding("takeNBytes", nindices, scatteredshape, 0, scatteredstrides, bsizes[1], N, 0))
     return -1;
  else
     scattered        =  (char *)   buffers[1];

  for(i=4; i<nindices; i++)
     if (NA_checkOneCBuffer("takeNbytes", niter, buffers[i], bsizes[i], sizeof(maybelong)))
        return -1;
  indices          =  (maybelong **) &buffers[4];

  if (NA_checkOneCBuffer("takeNbytes", niter*N, buffers[outi], bsizes[outi], 1))
     return -1;
  else 
     gathered         =  (char *)  buffers[ninargs+noutargs-1];

  switch( cMode )
    {
    case WRAPPED:
      for(i=0; i<niter; i++)
        {
          maybelong j, index;
          for(j=index=0; j<nindices; j++)
            {
              maybelong k = indices[j][i];
	      wrap(k, scatteredshape[j]);
              index += scatteredstrides[j]*k;
            }
          memcpy( &gathered[i*N], scattered+index, N);
        }
      break;
    case CLIPPED:
    default:
      for(i=0; i<niter; i++)
        {
          maybelong j, index;
          for(j=index=0; j<nindices; j++)
            {
              maybelong k = indices[j][i];
              if (k < 0)
                k = 0;
              else if (k >= scatteredshape[j])
                k = scatteredshape[j]-1;
              index += scatteredstrides[j]*k;
            }
          memcpy( &gathered[i*N], scattered+index, N);
        }
      break;
      case RAISE:
      for(i=0; i<niter; i++)
        {
          maybelong j, index;
          for(j=index=0; j<nindices; j++)
            {
              maybelong k = indices[j][i];
              if (k < 0)
                k += scatteredshape[j];
              if (k >= scatteredshape[j]) {
                 PyErr_Format(PyExc_IndexError, "Index out of range");
                 return -1;
              }
              index += scatteredstrides[j]*k;
            }
          memcpy( &gathered[i*N], scattered+index, N);
        }
      break;
    }
  return 0;
}

SELF_CHECKED_CFUNC_DESCR(takeNbytes, CFUNC_UFUNC);

static int putNbytes(long niter, long ninargs, long noutargs,
                         void **buffers, long *bsizes)
{
  maybelong  i, cMode, N;
  maybelong  *scatteredstrides, *scatteredshape, **indices;
  char   *gathered, *scattered;
  long nindices = ninargs-4, outi = ninargs+noutargs-1;

  if (nindices == 0)
     return 0;
  
  if (NA_checkIo("putNbytes", 4, 1, MIN(ninargs, 4), noutargs))
     return -1;

  if (NA_checkOneCBuffer("putNbytes", 2, buffers[0], bsizes[0], sizeof(maybelong)))
     return -1;
  else {
     cMode            =  ((maybelong *) buffers[0])[0];
     N                =  ((maybelong *) buffers[0])[1];
  }

  if (NA_checkOneCBuffer("putNbytes", niter*N, buffers[1], bsizes[1], 1))
     return -1;
  else 
     gathered  =  (char *)  buffers[1];

  if (NA_checkOneCBuffer("putNbytes", nindices, buffers[2], bsizes[2], sizeof(maybelong)))
     return -1;
  else {
     scatteredstrides =  (maybelong *)  buffers[2];
  }
  
  if (NA_checkOneCBuffer("putNbytes", nindices, buffers[3], bsizes[3], sizeof(maybelong)))
     return -1;
  else {
     scatteredshape   =  (maybelong *)  buffers[3];
  }

  for(i=4; i<nindices; i++)
     if (NA_checkOneCBuffer("putNbytes", niter, buffers[i], bsizes[i], sizeof(maybelong)))
        return -1;
  indices          =  (maybelong **) &buffers[4];

  if (NA_checkOneStriding("putNBytes", nindices, scatteredshape, 0, scatteredstrides, bsizes[outi], N, 0))
    return -1;
  else
    scattered        =  (char *)   buffers[outi];

  switch( cMode )
    {
    case WRAPPED:
      for(i=0; i<niter; i++)
        {
          maybelong j, index;
          for(j=index=0; j<nindices; j++)
            {
              maybelong k = indices[j][i];
	      wrap(k, scatteredshape[j]);
              index += scatteredstrides[j]*k;
            }
          memcpy( scattered+index, &gathered[i*N], N);
        }
      break;
    case CLIPPED:
    default:
      for(i=0; i<niter; i++)
        {
          maybelong j, index;
          for(j=index=0; j<nindices; j++)
            {
              maybelong k = indices[j][i];
              if (k < 0)
                k = 0;
              else if (k >= scatteredshape[j])
                k = scatteredshape[j]-1;
              index += scatteredstrides[j]*k;
            }
          memcpy( scattered+index, &gathered[i*N], N);
        }
      break;
    case RAISE:
      for(i=0; i<niter; i++)
        {
          maybelong j, index;
          for(j=index=0; j<nindices; j++)
            {
              maybelong k = indices[j][i];
              if (k < 0)
                k += scatteredshape[j];
              if (k >= scatteredshape[j]) {
                 PyErr_Format(PyExc_IndexError, "Index out of range");
                 return -1;              
              }
              index += scatteredstrides[j]*k;
            }
          memcpy( scattered+index, &gathered[i*N], N);
        }
      break;
    }
  return 0;
}

SELF_CHECKED_CFUNC_DESCR(putNbytes, CFUNC_UFUNC);

/****************** Int8 *******************/

/*******************************************
*                                          *
* These copy data to a contiguous buffer.  *
* They do not handle non-aligned data.     *
* Offsets and Strides are in byte units    *
*                                          *
*******************************************/

static int copy1bytes(long dim, long dummy, maybelong *niters,
            void *input,  long inboffset,  maybelong *inbstrides,
            void *output, long outboffset, maybelong *outbstrides) {
    long i;
    char *tin     = (char *) input  + inboffset;
    char *tout    = (char *) output + outboffset;
    if (dim == 0) {
        for (i=0; i<niters[dim]; i++) {
            NA_ACOPY1(tin, tout);
            tin  += inbstrides[dim];
            tout += outbstrides[dim];
        }
    }
    else {
        for (i=0; i<niters[dim]; i++) {
            copy1bytes(dim-1, dummy, niters,
               input,  inboffset  + i*inbstrides[dim],  inbstrides,
               output, outboffset + i*outbstrides[dim], outbstrides);
        }
    }
    return 0;
}

STRIDING_DESCR2(copy1bytes, CHECK_ALIGN, 1, 1);

static int align1bytes(long dim, long dummy, maybelong *niters,
            void *input,  long inboffset,  maybelong *inbstrides,
            void *output, long outboffset, maybelong *outbstrides) {
	return copyNbytes(dim, 1, niters, input, inboffset, inbstrides, 
			  output, outboffset, outbstrides);
}

STRIDING_DESCR2(align1bytes, !CHECK_ALIGN, 1, 1);


/******* byteswap *****/

static int byteswap1bytes(long dim, long dummy, maybelong *niters,
            void *input,  long inboffset,  maybelong *inbstrides,
            void *output, long outboffset, maybelong *outbstrides) {

    long i;
    char *tin  = (char *) input  + inboffset;
    char *tout = (char *) output + outboffset;
    if (dim == 0) {
        for (i=0; i<niters[dim]; i++) {
            char t[1];
            NA_COPY1(tin, t);
            NA_SWAP1(t, tout);
            tin  += inbstrides[dim];
            tout += outbstrides[dim];
        }
    }
    else {
        for (i=0; i<niters[dim]; i++) {
            byteswap1bytes(dim-1, dummy, niters,
               input,  inboffset  + i*inbstrides[dim],  inbstrides,
               output, outboffset + i*outbstrides[dim], outbstrides);
        }
    }
    return 0;
}

STRIDING_DESCR2(byteswap1bytes, !CHECK_ALIGN, 1, 1);


static int choose1bytes(long niter, long ninargs, long noutargs,
                         void **buffers, long *bsizes)
{
  maybelong i, cMode, maxP, N, *selector;
  char **population, *output;
  int outi = ninargs + noutargs - 1;

  if (NA_checkIo("choose1bytes", 2, 1, MIN(ninargs,2), noutargs))
     return -1;

  if (NA_checkOneCBuffer("choose1bytes", 2, buffers[0], bsizes[0], sizeof(maybelong)))
     return -1;
  else {
     cMode        =  ((maybelong *) buffers[0])[0];
     N            =  ((maybelong *) buffers[0])[1];
  }

  if (NA_checkOneCBuffer("choose1bytes", niter, buffers[1], bsizes[1],
     sizeof(maybelong)))
        return -1;
  else
     selector     =  (maybelong *)  buffers[1];

  if (ninargs-2 == 0)
    return 0;
  else
    maxP = ninargs-2;
  for(i=2; i<ninargs; i++)
     if (NA_checkOneCBuffer("choose1bytes", niter,
        buffers[i], bsizes[i], 1))
           return -1;
  population      =  (char **) &buffers[2];

  if (NA_checkOneCBuffer("choose1bytes", niter,
     buffers[outi], bsizes[outi], 1))
        return -1;
  else
     output       =  (char *)   buffers[outi];

  if (maxP == 0) 
	  return 0;

  switch(cMode)
    {
    case WRAPPED:
      for(i=0; i<niter; i++)
        {
          maybelong j = selector[i];
	  wrap(j, maxP);
          NA_ACOPY1(&population[j][i*1], &output[i*1]);
        }
      break;
    default:
    case CLIPPED:
      for(i=0; i<niter; i++)
        {
          maybelong j = selector[i];
          if (j < 0)
            j = 0;
          else if (j >= maxP)
            j = maxP-1;
          NA_ACOPY1(&population[j][i*1], &output[i*1]);
        }
      break;
    case RAISE:
      for(i=0; i<niter; i++)
        {
          maybelong j = selector[i];
          if ((j < 0) || (j >= maxP)) {
             PyErr_Format(PyExc_IndexError, "Index out of range");
             return -1;                           
          }
          NA_ACOPY1(&population[j][i*1], &output[i*1]);
        }
      break;
    }
  return 0;
}

SELF_CHECKED_CFUNC_DESCR(choose1bytes, CFUNC_UFUNC);

/****************** Int16 *******************/

/*******************************************
*                                          *
* These copy data to a contiguous buffer.  *
* They do not handle non-aligned data.     *
* Offsets and Strides are in byte units    *
*                                          *
*******************************************/

static int copy2bytes(long dim, long dummy, maybelong *niters,
            void *input,  long inboffset,  maybelong *inbstrides,
            void *output, long outboffset, maybelong *outbstrides) {
    long i;
    char *tin     = (char *) input  + inboffset;
    char *tout    = (char *) output + outboffset;
    if (dim == 0) {
        for (i=0; i<niters[dim]; i++) {
            NA_ACOPY2(tin, tout);
            tin  += inbstrides[dim];
            tout += outbstrides[dim];
        }
    }
    else {
        for (i=0; i<niters[dim]; i++) {
            copy2bytes(dim-1, dummy, niters,
               input,  inboffset  + i*inbstrides[dim],  inbstrides,
               output, outboffset + i*outbstrides[dim], outbstrides);
        }
    }
    return 0;
}

STRIDING_DESCR2(copy2bytes, CHECK_ALIGN, 2, 2);

static int align2bytes(long dim, long dummy, maybelong *niters,
            void *input,  long inboffset,  maybelong *inbstrides,
            void *output, long outboffset, maybelong *outbstrides) {
	return copyNbytes(dim, 2, niters, input, inboffset, inbstrides, 
			  output, outboffset, outbstrides);
}

STRIDING_DESCR2(align2bytes, !CHECK_ALIGN, 2, 2);


/******* byteswap *****/

static int byteswap2bytes(long dim, long dummy, maybelong *niters,
            void *input,  long inboffset,  maybelong *inbstrides,
            void *output, long outboffset, maybelong *outbstrides) {

    long i;
    char *tin  = (char *) input  + inboffset;
    char *tout = (char *) output + outboffset;
    if (dim == 0) {
        for (i=0; i<niters[dim]; i++) {
            char t[2];
            NA_COPY2(tin, t);
            NA_SWAP2(t, tout);
            tin  += inbstrides[dim];
            tout += outbstrides[dim];
        }
    }
    else {
        for (i=0; i<niters[dim]; i++) {
            byteswap2bytes(dim-1, dummy, niters,
               input,  inboffset  + i*inbstrides[dim],  inbstrides,
               output, outboffset + i*outbstrides[dim], outbstrides);
        }
    }
    return 0;
}

STRIDING_DESCR2(byteswap2bytes, !CHECK_ALIGN, 2, 2);


static int choose2bytes(long niter, long ninargs, long noutargs,
                         void **buffers, long *bsizes)
{
  maybelong i, cMode, maxP, N, *selector;
  char **population, *output;
  int outi = ninargs + noutargs - 1;

  if (NA_checkIo("choose2bytes", 2, 1, MIN(ninargs,2), noutargs))
     return -1;

  if (NA_checkOneCBuffer("choose2bytes", 2, buffers[0], bsizes[0], sizeof(maybelong)))
     return -1;
  else {
     cMode        =  ((maybelong *) buffers[0])[0];
     N            =  ((maybelong *) buffers[0])[1];
  }

  if (NA_checkOneCBuffer("choose2bytes", niter, buffers[1], bsizes[1],
     sizeof(maybelong)))
        return -1;
  else
     selector     =  (maybelong *)  buffers[1];

  if (ninargs-2 == 0)
    return 0;
  else
    maxP = ninargs-2;
  for(i=2; i<ninargs; i++)
     if (NA_checkOneCBuffer("choose2bytes", niter,
        buffers[i], bsizes[i], 2))
           return -1;
  population      =  (char **) &buffers[2];

  if (NA_checkOneCBuffer("choose2bytes", niter,
     buffers[outi], bsizes[outi], 2))
        return -1;
  else
     output       =  (char *)   buffers[outi];

  if (maxP == 0) 
	  return 0;

  switch(cMode)
    {
    case WRAPPED:
      for(i=0; i<niter; i++)
        {
          maybelong j = selector[i];
	  wrap(j, maxP);
          NA_ACOPY2(&population[j][i*2], &output[i*2]);
        }
      break;
    default:
    case CLIPPED:
      for(i=0; i<niter; i++)
        {
          maybelong j = selector[i];
          if (j < 0)
            j = 0;
          else if (j >= maxP)
            j = maxP-1;
          NA_ACOPY2(&population[j][i*2], &output[i*2]);
        }
      break;
    case RAISE:
      for(i=0; i<niter; i++)
        {
          maybelong j = selector[i];
          if ((j < 0) || (j >= maxP)) {
             PyErr_Format(PyExc_IndexError, "Index out of range");
             return -1;                           
          }
          NA_ACOPY2(&population[j][i*2], &output[i*2]);
        }
      break;
    }
  return 0;
}

SELF_CHECKED_CFUNC_DESCR(choose2bytes, CFUNC_UFUNC);

/****************** Int32 *******************/

/*******************************************
*                                          *
* These copy data to a contiguous buffer.  *
* They do not handle non-aligned data.     *
* Offsets and Strides are in byte units    *
*                                          *
*******************************************/

static int copy4bytes(long dim, long dummy, maybelong *niters,
            void *input,  long inboffset,  maybelong *inbstrides,
            void *output, long outboffset, maybelong *outbstrides) {
    long i;
    char *tin     = (char *) input  + inboffset;
    char *tout    = (char *) output + outboffset;
    if (dim == 0) {
        for (i=0; i<niters[dim]; i++) {
            NA_ACOPY4(tin, tout);
            tin  += inbstrides[dim];
            tout += outbstrides[dim];
        }
    }
    else {
        for (i=0; i<niters[dim]; i++) {
            copy4bytes(dim-1, dummy, niters,
               input,  inboffset  + i*inbstrides[dim],  inbstrides,
               output, outboffset + i*outbstrides[dim], outbstrides);
        }
    }
    return 0;
}

STRIDING_DESCR2(copy4bytes, CHECK_ALIGN, 4, 4);

static int align4bytes(long dim, long dummy, maybelong *niters,
            void *input,  long inboffset,  maybelong *inbstrides,
            void *output, long outboffset, maybelong *outbstrides) {
	return copyNbytes(dim, 4, niters, input, inboffset, inbstrides, 
			  output, outboffset, outbstrides);
}

STRIDING_DESCR2(align4bytes, !CHECK_ALIGN, 4, 4);


/******* byteswap *****/

static int byteswap4bytes(long dim, long dummy, maybelong *niters,
            void *input,  long inboffset,  maybelong *inbstrides,
            void *output, long outboffset, maybelong *outbstrides) {

    long i;
    char *tin  = (char *) input  + inboffset;
    char *tout = (char *) output + outboffset;
    if (dim == 0) {
        for (i=0; i<niters[dim]; i++) {
            char t[4];
            NA_COPY4(tin, t);
            NA_SWAP4(t, tout);
            tin  += inbstrides[dim];
            tout += outbstrides[dim];
        }
    }
    else {
        for (i=0; i<niters[dim]; i++) {
            byteswap4bytes(dim-1, dummy, niters,
               input,  inboffset  + i*inbstrides[dim],  inbstrides,
               output, outboffset + i*outbstrides[dim], outbstrides);
        }
    }
    return 0;
}

STRIDING_DESCR2(byteswap4bytes, !CHECK_ALIGN, 4, 4);


static int choose4bytes(long niter, long ninargs, long noutargs,
                         void **buffers, long *bsizes)
{
  maybelong i, cMode, maxP, N, *selector;
  char **population, *output;
  int outi = ninargs + noutargs - 1;

  if (NA_checkIo("choose4bytes", 2, 1, MIN(ninargs,2), noutargs))
     return -1;

  if (NA_checkOneCBuffer("choose4bytes", 2, buffers[0], bsizes[0], sizeof(maybelong)))
     return -1;
  else {
     cMode        =  ((maybelong *) buffers[0])[0];
     N            =  ((maybelong *) buffers[0])[1];
  }

  if (NA_checkOneCBuffer("choose4bytes", niter, buffers[1], bsizes[1],
     sizeof(maybelong)))
        return -1;
  else
     selector     =  (maybelong *)  buffers[1];

  if (ninargs-2 == 0)
    return 0;
  else
    maxP = ninargs-2;
  for(i=2; i<ninargs; i++)
     if (NA_checkOneCBuffer("choose4bytes", niter,
        buffers[i], bsizes[i], 4))
           return -1;
  population      =  (char **) &buffers[2];

  if (NA_checkOneCBuffer("choose4bytes", niter,
     buffers[outi], bsizes[outi], 4))
        return -1;
  else
     output       =  (char *)   buffers[outi];

  if (maxP == 0) 
	  return 0;

  switch(cMode)
    {
    case WRAPPED:
      for(i=0; i<niter; i++)
        {
          maybelong j = selector[i];
	  wrap(j, maxP);
          NA_ACOPY4(&population[j][i*4], &output[i*4]);
        }
      break;
    default:
    case CLIPPED:
      for(i=0; i<niter; i++)
        {
          maybelong j = selector[i];
          if (j < 0)
            j = 0;
          else if (j >= maxP)
            j = maxP-1;
          NA_ACOPY4(&population[j][i*4], &output[i*4]);
        }
      break;
    case RAISE:
      for(i=0; i<niter; i++)
        {
          maybelong j = selector[i];
          if ((j < 0) || (j >= maxP)) {
             PyErr_Format(PyExc_IndexError, "Index out of range");
             return -1;                           
          }
          NA_ACOPY4(&population[j][i*4], &output[i*4]);
        }
      break;
    }
  return 0;
}

SELF_CHECKED_CFUNC_DESCR(choose4bytes, CFUNC_UFUNC);

/****************** Float64 *******************/

/*******************************************
*                                          *
* These copy data to a contiguous buffer.  *
* They do not handle non-aligned data.     *
* Offsets and Strides are in byte units    *
*                                          *
*******************************************/

static int copy8bytes(long dim, long dummy, maybelong *niters,
            void *input,  long inboffset,  maybelong *inbstrides,
            void *output, long outboffset, maybelong *outbstrides) {
    long i;
    char *tin     = (char *) input  + inboffset;
    char *tout    = (char *) output + outboffset;
    if (dim == 0) {
        for (i=0; i<niters[dim]; i++) {
            NA_ACOPY8(tin, tout);
            tin  += inbstrides[dim];
            tout += outbstrides[dim];
        }
    }
    else {
        for (i=0; i<niters[dim]; i++) {
            copy8bytes(dim-1, dummy, niters,
               input,  inboffset  + i*inbstrides[dim],  inbstrides,
               output, outboffset + i*outbstrides[dim], outbstrides);
        }
    }
    return 0;
}

STRIDING_DESCR2(copy8bytes, CHECK_ALIGN, 8, 8);

static int align8bytes(long dim, long dummy, maybelong *niters,
            void *input,  long inboffset,  maybelong *inbstrides,
            void *output, long outboffset, maybelong *outbstrides) {
	return copyNbytes(dim, 8, niters, input, inboffset, inbstrides, 
			  output, outboffset, outbstrides);
}

STRIDING_DESCR2(align8bytes, !CHECK_ALIGN, 8, 8);


/******* byteswap *****/

static int byteswap8bytes(long dim, long dummy, maybelong *niters,
            void *input,  long inboffset,  maybelong *inbstrides,
            void *output, long outboffset, maybelong *outbstrides) {

    long i;
    char *tin  = (char *) input  + inboffset;
    char *tout = (char *) output + outboffset;
    if (dim == 0) {
        for (i=0; i<niters[dim]; i++) {
            char t[8];
            NA_COPY8(tin, t);
            NA_SWAP8(t, tout);
            tin  += inbstrides[dim];
            tout += outbstrides[dim];
        }
    }
    else {
        for (i=0; i<niters[dim]; i++) {
            byteswap8bytes(dim-1, dummy, niters,
               input,  inboffset  + i*inbstrides[dim],  inbstrides,
               output, outboffset + i*outbstrides[dim], outbstrides);
        }
    }
    return 0;
}

STRIDING_DESCR2(byteswap8bytes, !CHECK_ALIGN, 8, 8);


static int choose8bytes(long niter, long ninargs, long noutargs,
                         void **buffers, long *bsizes)
{
  maybelong i, cMode, maxP, N, *selector;
  char **population, *output;
  int outi = ninargs + noutargs - 1;

  if (NA_checkIo("choose8bytes", 2, 1, MIN(ninargs,2), noutargs))
     return -1;

  if (NA_checkOneCBuffer("choose8bytes", 2, buffers[0], bsizes[0], sizeof(maybelong)))
     return -1;
  else {
     cMode        =  ((maybelong *) buffers[0])[0];
     N            =  ((maybelong *) buffers[0])[1];
  }

  if (NA_checkOneCBuffer("choose8bytes", niter, buffers[1], bsizes[1],
     sizeof(maybelong)))
        return -1;
  else
     selector     =  (maybelong *)  buffers[1];

  if (ninargs-2 == 0)
    return 0;
  else
    maxP = ninargs-2;
  for(i=2; i<ninargs; i++)
     if (NA_checkOneCBuffer("choose8bytes", niter,
        buffers[i], bsizes[i], 8))
           return -1;
  population      =  (char **) &buffers[2];

  if (NA_checkOneCBuffer("choose8bytes", niter,
     buffers[outi], bsizes[outi], 8))
        return -1;
  else
     output       =  (char *)   buffers[outi];

  if (maxP == 0) 
	  return 0;

  switch(cMode)
    {
    case WRAPPED:
      for(i=0; i<niter; i++)
        {
          maybelong j = selector[i];
	  wrap(j, maxP);
          NA_ACOPY8(&population[j][i*8], &output[i*8]);
        }
      break;
    default:
    case CLIPPED:
      for(i=0; i<niter; i++)
        {
          maybelong j = selector[i];
          if (j < 0)
            j = 0;
          else if (j >= maxP)
            j = maxP-1;
          NA_ACOPY8(&population[j][i*8], &output[i*8]);
        }
      break;
    case RAISE:
      for(i=0; i<niter; i++)
        {
          maybelong j = selector[i];
          if ((j < 0) || (j >= maxP)) {
             PyErr_Format(PyExc_IndexError, "Index out of range");
             return -1;                           
          }
          NA_ACOPY8(&population[j][i*8], &output[i*8]);
        }
      break;
    }
  return 0;
}

SELF_CHECKED_CFUNC_DESCR(choose8bytes, CFUNC_UFUNC);

/****************** Complex64 *******************/

/*******************************************
*                                          *
* These copy data to a contiguous buffer.  *
* They do not handle non-aligned data.     *
* Offsets and Strides are in byte units    *
*                                          *
*******************************************/

static int copy16bytes(long dim, long dummy, maybelong *niters,
            void *input,  long inboffset,  maybelong *inbstrides,
            void *output, long outboffset, maybelong *outbstrides) {
    long i;
    char *tin     = (char *) input  + inboffset;
    char *tout    = (char *) output + outboffset;
    if (dim == 0) {
        for (i=0; i<niters[dim]; i++) {
            NA_ACOPY16(tin, tout);
            tin  += inbstrides[dim];
            tout += outbstrides[dim];
        }
    }
    else {
        for (i=0; i<niters[dim]; i++) {
            copy16bytes(dim-1, dummy, niters,
               input,  inboffset  + i*inbstrides[dim],  inbstrides,
               output, outboffset + i*outbstrides[dim], outbstrides);
        }
    }
    return 0;
}

STRIDING_DESCR2(copy16bytes, CHECK_ALIGN, 16, 16);

static int align16bytes(long dim, long dummy, maybelong *niters,
            void *input,  long inboffset,  maybelong *inbstrides,
            void *output, long outboffset, maybelong *outbstrides) {
	return copyNbytes(dim, 16, niters, input, inboffset, inbstrides, 
			  output, outboffset, outbstrides);
}

STRIDING_DESCR2(align16bytes, !CHECK_ALIGN, 16, 16);


/******* byteswap *****/

static int byteswap16bytes(long dim, long dummy, maybelong *niters,
            void *input,  long inboffset,  maybelong *inbstrides,
            void *output, long outboffset, maybelong *outbstrides) {

    long i;
    char *tin  = (char *) input  + inboffset;
    char *tout = (char *) output + outboffset;
    if (dim == 0) {
        for (i=0; i<niters[dim]; i++) {
            char t[16];
            NA_COPY16(tin, t);
            NA_SWAP16(t, tout);
            tin  += inbstrides[dim];
            tout += outbstrides[dim];
        }
    }
    else {
        for (i=0; i<niters[dim]; i++) {
            byteswap16bytes(dim-1, dummy, niters,
               input,  inboffset  + i*inbstrides[dim],  inbstrides,
               output, outboffset + i*outbstrides[dim], outbstrides);
        }
    }
    return 0;
}

STRIDING_DESCR2(byteswap16bytes, !CHECK_ALIGN, 16, 16);


static int choose16bytes(long niter, long ninargs, long noutargs,
                         void **buffers, long *bsizes)
{
  maybelong i, cMode, maxP, N, *selector;
  char **population, *output;
  int outi = ninargs + noutargs - 1;

  if (NA_checkIo("choose16bytes", 2, 1, MIN(ninargs,2), noutargs))
     return -1;

  if (NA_checkOneCBuffer("choose16bytes", 2, buffers[0], bsizes[0], sizeof(maybelong)))
     return -1;
  else {
     cMode        =  ((maybelong *) buffers[0])[0];
     N            =  ((maybelong *) buffers[0])[1];
  }

  if (NA_checkOneCBuffer("choose16bytes", niter, buffers[1], bsizes[1],
     sizeof(maybelong)))
        return -1;
  else
     selector     =  (maybelong *)  buffers[1];

  if (ninargs-2 == 0)
    return 0;
  else
    maxP = ninargs-2;
  for(i=2; i<ninargs; i++)
     if (NA_checkOneCBuffer("choose16bytes", niter,
        buffers[i], bsizes[i], 16))
           return -1;
  population      =  (char **) &buffers[2];

  if (NA_checkOneCBuffer("choose16bytes", niter,
     buffers[outi], bsizes[outi], 16))
        return -1;
  else
     output       =  (char *)   buffers[outi];

  if (maxP == 0) 
	  return 0;

  switch(cMode)
    {
    case WRAPPED:
      for(i=0; i<niter; i++)
        {
          maybelong j = selector[i];
	  wrap(j, maxP);
          NA_ACOPY16(&population[j][i*16], &output[i*16]);
        }
      break;
    default:
    case CLIPPED:
      for(i=0; i<niter; i++)
        {
          maybelong j = selector[i];
          if (j < 0)
            j = 0;
          else if (j >= maxP)
            j = maxP-1;
          NA_ACOPY16(&population[j][i*16], &output[i*16]);
        }
      break;
    case RAISE:
      for(i=0; i<niter; i++)
        {
          maybelong j = selector[i];
          if ((j < 0) || (j >= maxP)) {
             PyErr_Format(PyExc_IndexError, "Index out of range");
             return -1;                           
          }
          NA_ACOPY16(&population[j][i*16], &output[i*16]);
        }
      break;
    }
  return 0;
}

SELF_CHECKED_CFUNC_DESCR(choose16bytes, CFUNC_UFUNC);

/****************** AnyType *******************/

static int chooseNbytes(long niter, long ninargs, long noutargs,
                         void **buffers, long *bsizes)
{
  maybelong i, cMode, maxP, N, *selector;
  char **population, *output;
  int outi = ninargs + noutargs - 1;

  if (NA_checkIo("chooseNbytes", 2, 1, MIN(ninargs,2), noutargs))
     return -1;

  if (NA_checkOneCBuffer("chooseNbytes", 2, buffers[0], bsizes[0], sizeof(maybelong)))
     return -1;
  else {
     cMode        =  ((maybelong *) buffers[0])[0];
     N            =  ((maybelong *) buffers[0])[1];
  }

  if (NA_checkOneCBuffer("chooseNbytes", niter, buffers[1], bsizes[1],
     sizeof(maybelong)))
        return -1;
  else
     selector     =  (maybelong *)  buffers[1];

  if (ninargs-2 == 0)
    return 0;
  else
    maxP = ninargs-2;
  for(i=2; i<ninargs; i++)
     if (NA_checkOneCBuffer("chooseNbytes", niter,
        buffers[i], bsizes[i], N))
           return -1;
  population      =  (char **) &buffers[2];

  if (NA_checkOneCBuffer("chooseNbytes", niter,
     buffers[outi], bsizes[outi], N))
        return -1;
  else
     output       =  (char *)   buffers[outi];

  if (maxP == 0) 
	  return 0;

  switch(cMode)
    {
    case WRAPPED:
      for(i=0; i<niter; i++)
        {
          maybelong j = selector[i];
	  wrap(j, maxP);
          NA_ACOPYN(&population[j][i*N], &output[i*N]);
        }
      break;
    default:
    case CLIPPED:
      for(i=0; i<niter; i++)
        {
          maybelong j = selector[i];
          if (j < 0)
            j = 0;
          else if (j >= maxP)
            j = maxP-1;
          NA_ACOPYN(&population[j][i*N], &output[i*N]);
        }
      break;
    case RAISE:
      for(i=0; i<niter; i++)
        {
          maybelong j = selector[i];
          if ((j < 0) || (j >= maxP)) {
             PyErr_Format(PyExc_IndexError, "Index out of range");
             return -1;                           
          }
          NA_ACOPYN(&population[j][i*N], &output[i*N]);
        }
      break;
    }
  return 0;
}

SELF_CHECKED_CFUNC_DESCR(chooseNbytes, CFUNC_UFUNC);

/****************** Complex32 *******************/

/******* byteswap *****/

static int byteswapComplex32(long dim, long dummy, maybelong *niters,
            void *input,  long inboffset,  maybelong *inbstrides,
            void *output, long outboffset, maybelong *outbstrides) {

    long i;
    char *tin  = (char *) input  + inboffset;
    char *tout = (char *) output + outboffset;
    if (dim == 0) {
        for (i=0; i<niters[dim]; i++) {
            char t[8];
            NA_COPY8(tin, t);
            NA_COMPLEX_SWAP8(t, tout);
            tin  += inbstrides[dim];
            tout += outbstrides[dim];
        }
    }
    else {
        for (i=0; i<niters[dim]; i++) {
            byteswapComplex32(dim-1, dummy, niters,
               input,  inboffset  + i*inbstrides[dim],  inbstrides,
               output, outboffset + i*outbstrides[dim], outbstrides);
        }
    }
    return 0;
}

STRIDING_DESCR2(byteswapComplex32, !CHECK_ALIGN, 8, 8);


/****************** Complex64 *******************/

/******* byteswap *****/

static int byteswapComplex64(long dim, long dummy, maybelong *niters,
            void *input,  long inboffset,  maybelong *inbstrides,
            void *output, long outboffset, maybelong *outbstrides) {

    long i;
    char *tin  = (char *) input  + inboffset;
    char *tout = (char *) output + outboffset;
    if (dim == 0) {
        for (i=0; i<niters[dim]; i++) {
            char t[16];
            NA_COPY16(tin, t);
            NA_COMPLEX_SWAP16(t, tout);
            tin  += inbstrides[dim];
            tout += outbstrides[dim];
        }
    }
    else {
        for (i=0; i<niters[dim]; i++) {
            byteswapComplex64(dim-1, dummy, niters,
               input,  inboffset  + i*inbstrides[dim],  inbstrides,
               output, outboffset + i*outbstrides[dim], outbstrides);
        }
    }
    return 0;
}

STRIDING_DESCR2(byteswapComplex64, !CHECK_ALIGN, 16, 16);


static PyMethodDef _bytesMethods[] = {
	{ "copyToString", copyToString, METH_VARARGS},

	{NULL,      NULL}        /* Sentinel */
};

static PyObject *init_funcDict(void) {
    PyObject *dict;
    dict = PyDict_New();
    NA_add_cfunc(dict, "copy1bytes", (void *) &copy1bytes_descr);
    NA_add_cfunc(dict, "byteswap1bytes", (void *) &byteswap1bytes_descr);
    NA_add_cfunc(dict, "align1bytes", (void *) &align1bytes_descr);
    NA_add_cfunc(dict, "choose1bytes", (void *) &choose1bytes_descr);
    NA_add_cfunc(dict, "copy2bytes", (void *) &copy2bytes_descr);
    NA_add_cfunc(dict, "byteswap2bytes", (void *) &byteswap2bytes_descr);
    NA_add_cfunc(dict, "align2bytes", (void *) &align2bytes_descr);
    NA_add_cfunc(dict, "choose2bytes", (void *) &choose2bytes_descr);
    NA_add_cfunc(dict, "copy4bytes", (void *) &copy4bytes_descr);
    NA_add_cfunc(dict, "byteswap4bytes", (void *) &byteswap4bytes_descr);
    NA_add_cfunc(dict, "align4bytes", (void *) &align4bytes_descr);
    NA_add_cfunc(dict, "choose4bytes", (void *) &choose4bytes_descr);
    NA_add_cfunc(dict, "copy8bytes", (void *) &copy8bytes_descr);
    NA_add_cfunc(dict, "byteswap8bytes", (void *) &byteswap8bytes_descr);
    NA_add_cfunc(dict, "align8bytes", (void *) &align8bytes_descr);
    NA_add_cfunc(dict, "choose8bytes", (void *) &choose8bytes_descr);
    NA_add_cfunc(dict, "copy16bytes", (void *) &copy16bytes_descr);
    NA_add_cfunc(dict, "byteswap16bytes", (void *) &byteswap16bytes_descr);
    NA_add_cfunc(dict, "align16bytes", (void *) &align16bytes_descr);
    NA_add_cfunc(dict, "choose16bytes", (void *) &choose16bytes_descr);
    NA_add_cfunc(dict, "chooseNbytes", (void *) &chooseNbytes_descr);
    NA_add_cfunc(dict, "copyNbytes", (void *) &copyNbytes_descr);
    NA_add_cfunc(dict, "putNbytes", (void *) &putNbytes_descr);
    NA_add_cfunc(dict, "takeNbytes", (void *) &takeNbytes_descr);
    NA_add_cfunc(dict, "byteswapComplex32", (void *) &byteswapComplex32_descr);
    NA_add_cfunc(dict, "byteswapComplex64", (void *) &byteswapComplex64_descr);
    return dict;
}

/* platform independent*/
#ifdef MS_WIN32
__declspec(dllexport)
#endif
void init_bytes(void) {
    PyObject *m, *d, *functions;
    m = Py_InitModule("_bytes", _bytesMethods);
    d = PyModule_GetDict(m);
    import_libnumarray();
    functions = init_funcDict();
    PyDict_SetItemString(d, "functionDict", functions);
    Py_DECREF(functions);
    ADD_VERSION(m);
}


syntax highlighted by Code2HTML, v. 0.9.1