/*
 *  R : A Computer Language for Statistical Data Analysis
 *  Copyright (C) 1995, 1996  Robert Gentleman and Ross Ihaka
 *  Copyright (C) 1997--2006  Robert Gentleman, Ross Ihaka and the
 *                            R Development Core Team
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin Street Fifth Floor, Boston, MA 02110-1301  USA
 */

/* <UTF8> char here is handled as a whole string */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "Defn.h"

static int integerOneIndex(int i, int len) {
    int indx = -1;

    if (i > 0)
	indx = i - 1;
    else if (i == 0 || len < 2)
	error(_("attempt to select less than one element"));
    else if (len == 2 && i > -3)
	indx = 2 + i;
    else
	error(_("attempt to select more than one element"));
    return(indx);
}

int attribute_hidden
OneIndex(SEXP x, SEXP s, int len, int partial, SEXP *newname, int pos)
{
    SEXP names;
    int i, indx, nx;

    if (pos < 0 && length(s) > 1)
	error(_("attempt to select more than one element"));
    if (pos < 0 && length(s) < 1)
	error(_("attempt to select less than one element"));
    if(pos < 0) pos = 0;

    indx = -1;
    *newname = R_NilValue;
    switch(TYPEOF(s)) {
    case LGLSXP:
    case INTSXP:
	indx = integerOneIndex(INTEGER(s)[pos], len);
	break;
    case REALSXP:
	indx = integerOneIndex(REAL(s)[pos], len);
	break;
    case STRSXP:
	nx = length(x);
	names = getAttrib(x, R_NamesSymbol);
	if (names != R_NilValue) {
	    /* Try for exact match */
	    for (i = 0; i < nx; i++)
		if (streql(CHAR(STRING_ELT(names, i)),
			   CHAR(STRING_ELT(s, pos)))) {
		    indx = i;
		    break;
		}
	    /* Try for partial match */
	    if (partial && indx < 0) {
		len = strlen(CHAR(STRING_ELT(s, pos)));
		for(i = 0; i < nx; i++) {
		    if(!strncmp(CHAR(STRING_ELT(names, i)),
				CHAR(STRING_ELT(s, pos)), len)) {
			if(indx == -1 )
			    indx = i;
			else
			    indx = -2;
		    }
		}
	    }
	}
	if (indx == -1)
	    indx = nx;
	*newname = STRING_ELT(s, pos);
	break;
    case SYMSXP:
	nx = length(x);
	names = getAttrib(x, R_NamesSymbol);
	if (names != R_NilValue) {
	    for (i = 0; i < nx; i++)
		if (streql(CHAR(STRING_ELT(names, i)),
			   CHAR(PRINTNAME(s)))) {
		    indx = i;
		    break;
		}
	}
	if (indx == -1)
	    indx = nx;
	*newname = STRING_ELT(s, pos);
	break;
    default:
	error(_("invalid subscript type"));
    }
    return indx;
}

int attribute_hidden
get1index(SEXP s, SEXP names, int len, Rboolean pok, int pos)
{
/* Get a single index for the [[ operator.
   Check that only one index is being selected.
   pok : is "partial ok" ?
*/
    int indx, i;
    double dblind;

    if (pos < 0 && length(s) != 1) {
	if (length(s) > 1)
	    error(_("attempt to select more than one element"));
	else
	    error(_("attempt to select less than one element"));
    } else
	if(pos >= length(s))
	    error(_("internal error in use of recursive indexing"));
    if(pos < 0) pos = 0;
    indx = -1;
    switch (TYPEOF(s)) {
    case LGLSXP:
    case INTSXP:
	i = INTEGER(s)[pos];
	if(i != NA_INTEGER)
	    indx = integerOneIndex(i, len);
	break;
    case REALSXP:
	dblind = REAL(s)[pos];
	if(!ISNAN(dblind))
	    indx = integerOneIndex((int)dblind, len);
	break;
    case STRSXP:
	/* Try for exact match */
	for (i = 0; i < length(names); i++) 
	    if (STRING_ELT(names, i) == NA_STRING || 
		STRING_ELT(s, pos) == NA_STRING) {
		/* NA matches nothing */
	    } else {
		if (streql(CHAR(STRING_ELT(names, i)),
			   CHAR(STRING_ELT(s, pos)))) {
		    indx = i;
		    break;
		}
	    }
	/* Try for partial match */
	if (pok && indx < 0) {
	    len = strlen(CHAR(STRING_ELT(s, pos)));
	    for(i = 0; i < length(names); i++) {
		if (STRING_ELT(names, i) == NA_STRING || 
		    STRING_ELT(s, pos) == NA_STRING) {
		    /* NA matches nothing */
		} else {
		    if(!strncmp(CHAR(STRING_ELT(names, i)),
				CHAR(STRING_ELT(s, pos)), len)) {
			if(indx == -1)/* first one */
			    indx = i;
			else
			    indx = -2;/* more than one partial match */
		    }
		}
	    }
	}
	break;
    case SYMSXP:
	for (i = 0; i < length(names); i++)
	    if (STRING_ELT(names, i) != NA_STRING &&
		streql(CHAR(STRING_ELT(names, i)), CHAR(PRINTNAME(s)))) {
		indx = i;
		break;
	    }
    default:
	error(_("invalid subscript type"));
    }
    return indx;
}

/* Special Matrix Subscripting: Handles the case x[i] where */
/* x is an n-way array and i is a matrix with n columns. */
/* This code returns a vector containing the integer subscripts */
/* to be extracted when x is regarded as unravelled. */
/* Negative indices are not allowed. */
/* A zero anywhere in a row will cause a zero in the same */
/* position in the result. */

SEXP attribute_hidden mat2indsub(SEXP dims, SEXP s)
{
    int tdim, j, i, k, nrs = nrows(s);
    SEXP rvec;

    if (ncols(s) != LENGTH(dims))
	error(_("incorrect number of columns in matrix subscript"));
    PROTECT(rvec = allocVector(INTSXP, nrs));
    s = coerceVector(s, INTSXP);
    setIVector(INTEGER(rvec), nrs, 0);

    for (i = 0; i < nrs; i++) {
	tdim = 1;
	/* compute 0-based subscripts for a row (0 in the input gets -1
	   in the output here) */
	for (j = 0; j < LENGTH(dims); j++) {
	    k = INTEGER(s)[i + j * nrs];
	    if(k == NA_INTEGER) {
		INTEGER(rvec)[i] = NA_INTEGER;
		break;
	    }
	    if(k < 0) error(_("negative values are not allowed in a matrix subscript"));
	    if(k == 0) {
		INTEGER(rvec)[i] = -1;
		break;
	    }
	    if (k > INTEGER(dims)[j])
		error(_("subscript out of bounds"));
	    INTEGER(rvec)[i] += (k - 1) * tdim;
	    tdim *= INTEGER(dims)[j];
	}
	/* transform to 1 based subscripting (0 in the input gets 0 
	   in the output here) */
	if(INTEGER(rvec)[i] != NA_INTEGER)
	    INTEGER(rvec)[i]++;
    }
    UNPROTECT(1);
    return (rvec);
}

static SEXP nullSubscript(int n)
{
    int i;
    SEXP indx;
    indx = allocVector(INTSXP, n);
    for (i = 0; i < n; i++)
	INTEGER(indx)[i] = i + 1;
    return indx;
}

static SEXP logicalSubscript(SEXP s, int ns, int nx, int *stretch)
{
    int canstretch, count, i, nmax;
    SEXP indx;
    canstretch = *stretch;
    if (!canstretch && ns > nx)
	error(_("(subscript) logical subscript too long"));
    nmax = (ns > nx) ? ns : nx;
    *stretch = (ns > nx) ? ns : 0;
    if (ns == 0)
	return(allocVector(INTSXP, 0));
    count = 0;
    for (i = 0; i < nmax; i++)
	if (LOGICAL(s)[i%ns])
	    count++;
    indx = allocVector(INTSXP, count);
    count = 0;
    for (i = 0; i < nmax; i++)
	if (LOGICAL(s)[i%ns]) {
	    if (LOGICAL(s)[i%ns] == NA_LOGICAL)
		INTEGER(indx)[count++] = NA_INTEGER;
	    else
		INTEGER(indx)[count++] = i + 1;
	}
    return indx;
}

static SEXP negativeSubscript(SEXP s, int ns, int nx)
{
    SEXP indx;
    int stretch = 0;
    int i, ix;
    PROTECT(indx = allocVector(LGLSXP, nx));
    for (i = 0; i < nx; i++)
	LOGICAL(indx)[i] = 1;
    for (i = 0; i < ns; i++) {
	ix = INTEGER(s)[i];
	if (ix != 0 && ix != NA_INTEGER && -ix <= nx)
	    LOGICAL(indx)[-ix - 1] = 0;
    }
    s = logicalSubscript(indx, nx, nx, &stretch);
    UNPROTECT(1);
    return s;
}

static SEXP positiveSubscript(SEXP s, int ns, int nx)
{
    SEXP indx;
    int i, zct = 0;
    for (i = 0; i < ns; i++) {
	if (INTEGER(s)[i] == 0)
	    zct++;
    }
    if (zct) {
	indx = allocVector(INTSXP, (ns - zct));
	for (i = 0, zct = 0; i < ns; i++)
	    if (INTEGER(s)[i] != 0)
		INTEGER(indx)[zct++] = INTEGER(s)[i];
	return indx;
    }
    else
	return s;
}

static SEXP integerSubscript(SEXP s, int ns, int nx, int *stretch)
{
    int i, ii, min, max, canstretch;
    Rboolean isna = FALSE;
    canstretch = *stretch;
    *stretch = 0;
    min = 0;
    max = 0;
    for (i = 0; i < ns; i++) {
	ii = INTEGER(s)[i];
	if (ii != NA_INTEGER) {
	    if (ii < min)
		min = ii;
	    if (ii > max)
		max = ii;
	} else isna = TRUE;
    }
    if (min < -nx)
	error(_("subscript out of bounds"));
    if (max > nx) {
	if(canstretch) *stretch = max;
	else error(_("subscript out of bounds"));
    }
    if (min < 0) {
	if (max == 0 && !isna) return negativeSubscript(s, ns, nx);
	else error(_("only 0's may be mixed with negative subscripts"));
    }
    else return positiveSubscript(s, ns, nx);
    return R_NilValue;
}

typedef SEXP (*StringEltGetter)(SEXP x, int i);

/* This uses a couple of horrible hacks in conjunction with
 * VectorAssign (in subassign.c).  If subscripting is used for
 * assignment, it is possible to extend a vector by supplying new
 * names, and we want to give the extended vector those names, so they
 * are returned as the attribute. Also, unset elements of the vector
 * of new names (places where a match was found) are indicated by
 * setting the element of the newnames vector to NULL, even though it
 * is a character vector.
*/

/* The original code (pre 2.0.0) used a ns x nx loop that was too
 * slow.  So now we hash.  Hashing is expensive on memory (up to 32nx
 * bytes) so it is only worth doing if ns * nx is large.  If nx is
 * large, then it will be too slow unless ns is very small.
 */

#define USE_HASHING 1
static SEXP stringSubscript(SEXP s, int ns, int nx, SEXP names,
			    StringEltGetter strg, int *stretch, Rboolean in)
{
    SEXP indx, indexnames;
    int i, j, nnames, sub, extra;
    int canstretch = *stretch;
#ifdef USE_HASHING
    Rboolean usehashing = in && (ns * nx > 1000);
#else
    Rboolean usehashing = FALSE;
#endif

    PROTECT(s);
    PROTECT(names);
    PROTECT(indexnames = allocVector(STRSXP, ns));
    nnames = nx;
    extra = nnames;

    /* Process each of the subscripts. First we compare with the names
     * on the vector and then (if there is no match) with each of the
     * previous subscripts, since (if assigning) we may have already
     * added an element of that name. (If we are not assigning, any
     * nonmatch will have given an error.)
     */

#ifdef USE_HASHING
    if(usehashing) {
	/* must be internal, so names contains a character vector */
	/* NB: this does not behave in the same way with respect to ""
	   and NA names: they will match */
	PROTECT(indx = match(names, s, 0));
	/* second pass to correct this */
	for (i = 0; i < ns; i++)
	    if(STRING_ELT(s, i) == NA_STRING || !CHAR(STRING_ELT(s, i))[0])
		INTEGER(indx)[i] = 0;
	for (i = 0; i < ns; i++) SET_STRING_ELT(indexnames, i, R_NilValue);
    } else {
#endif
	PROTECT(indx = allocVector(INTSXP, ns));
	for (i = 0; i < ns; i++) {
	    sub = 0;
	    if (names != R_NilValue) {
		for (j = 0; j < nnames; j++) {
		    SEXP names_j = strg(names, j);
		    if (!in && TYPEOF(names_j) != CHARSXP)
			error(_("character vector element does not have type CHARSXP"));
		    if (NonNullStringMatch(STRING_ELT(s, i), names_j)) {
			sub = j + 1;
			SET_STRING_ELT(indexnames, i, R_NilValue);
			break;
		    }
		}
	    }
	    INTEGER(indx)[i] = sub;
	}
#ifdef USE_HASHING
    }
#endif

    for (i = 0; i < ns; i++) {
	sub = INTEGER(indx)[i];
	if (sub == 0) {
	    for (j = 0 ; j < i ; j++)
		if (NonNullStringMatch(STRING_ELT(s, i), STRING_ELT(s, j))) {
		    sub = INTEGER(indx)[j];
		    SET_STRING_ELT(indexnames, i, STRING_ELT(s, j));
		    break;
		}
	}
	if (sub == 0) {
	    if (!canstretch)
		error(_("subscript out of bounds"));
	    extra += 1;
	    sub = extra;
	    SET_STRING_ELT(indexnames, i, STRING_ELT(s, i));
	}
	INTEGER(indx)[i] = sub;
    }
    /* We return the new names as the names attribute of the returned
       subscript vector. */
    if (extra != nnames)
	setAttrib(indx, R_NamesSymbol, indexnames);
    if (canstretch)
	*stretch = extra;
    UNPROTECT(4);
    return indx;
}

/* Array Subscripts.
    dim is the dimension (0 to k-1)
    s is the subscript list,
    dims is the dimensions of x
    dng is a function (usually getAttrib) that obtains the dimnames
    x is the array to be subscripted.
*/

typedef SEXP AttrGetter(SEXP x, SEXP data);

static SEXP 
int_arraySubscript(int dim, SEXP s, SEXP dims, AttrGetter dng,
		   StringEltGetter strg, SEXP x, Rboolean in)
{
    int nd, ns, stretch = 0;
    SEXP dnames, tmp;
    ns = length(s);
    nd = INTEGER(dims)[dim];

    switch (TYPEOF(s)) {
    case NILSXP:
	return allocVector(INTSXP, 0);
    case LGLSXP:
	return logicalSubscript(s, ns, nd, &stretch);
    case INTSXP:
	return integerSubscript(s, ns, nd, &stretch);
    case REALSXP:
    	PROTECT(tmp = coerceVector(s, INTSXP));
	tmp = integerSubscript(tmp, ns, nd, &stretch);
    	UNPROTECT(1);
	return tmp;
    case STRSXP:
	dnames = dng(x, R_DimNamesSymbol);
	if (dnames == R_NilValue)
	    error(_("no 'dimnames' attribute for array"));
	dnames = VECTOR_ELT(dnames, dim);
	return stringSubscript(s, ns, nd, dnames, strg, &stretch, in);
    case SYMSXP:
	if (s == R_MissingArg)
	    return nullSubscript(nd);
    default:
	error(_("invalid subscript"));
    }
    return R_NilValue;
}

/* This is used by packages arules and cba. Seems dangerous as the 
   typedef is not exported */
SEXP
arraySubscript(int dim, SEXP s, SEXP dims, AttrGetter dng,
	       StringEltGetter strg, SEXP x)
{
    return int_arraySubscript(dim, s, dims, dng, strg, x, TRUE);
}

/* Subscript creation.  The first thing we do is check to see */
/* if there are any user supplied NULL's, these result in */
/* returning a vector of length 0. */
/* if stretch is zero on entry then the vector x cannot be
   "stretched",
   otherwise, stretch returns the new required length for x
*/

SEXP attribute_hidden makeSubscript(SEXP x, SEXP s, int *stretch)
{
    int nx;
    SEXP ans;

    ans = R_NilValue;
    if (isVector(x) || isList(x) || isLanguage(x)) {
	nx = length(x);

	ans = vectorSubscript(nx, s, stretch, getAttrib, (STRING_ELT), x);
    }
    else error(_("subscripting on non-vector"));
    return ans;

}

/* nx is the length of the object being subscripted,
   s is the R subscript value,
   dng gets a given attrib for x, which is the object we are
   subsetting,
*/

static SEXP 
int_vectorSubscript(int nx, SEXP s, int *stretch, AttrGetter dng,
		    StringEltGetter strg, SEXP x, Rboolean in)
{
    int ns;
    SEXP ans = R_NilValue, tmp;

    ns = length(s);
    /* special case for simple indices -- does not duplicate */
    if (ns == 1 && TYPEOF(s) == INTSXP && ATTRIB(s) == R_NilValue) {
	int i = INTEGER(s)[0];
	if (0 < i && i <= nx) {
	    *stretch = 0;
	    return s;
	}
    }
    PROTECT(s = duplicate(s));
    SET_ATTRIB(s, R_NilValue);
    SET_OBJECT(s, 0);
    switch (TYPEOF(s)) {
    case NILSXP:
	*stretch = 0;
	ans = allocVector(INTSXP, 0);
	break;
    case LGLSXP:
	/* *stretch = 0; */
	ans = logicalSubscript(s, ns, nx, stretch);
	break;
    case INTSXP:
	    ans = integerSubscript(s, ns, nx, stretch);
	    break;
    case REALSXP:
	PROTECT(tmp = coerceVector(s, INTSXP));
	ans = integerSubscript(tmp, ns, nx, stretch);
	UNPROTECT(1);
	break;
    case STRSXP:
    {
	SEXP names = dng(x, R_NamesSymbol);
	/* *stretch = 0; */
	ans = stringSubscript(s, ns, nx, names, strg, stretch, in);
    }
    break;
    case SYMSXP:
	*stretch = 0;
	if (s == R_MissingArg) {
	    ans = nullSubscript(nx);
	    break;
	}
    default:
	error(_("invalid subscript type"));
    }
    UNPROTECT(1);
    return ans;
}


SEXP attribute_hidden
vectorSubscript(int nx, SEXP s, int *stretch, AttrGetter dng,
		StringEltGetter strg, SEXP x)
{
    return int_vectorSubscript(nx, s, stretch, dng, strg, x, TRUE);
}



syntax highlighted by Code2HTML, v. 0.9.1