/* * R : A Computer Language for Statistical Data Analysis * Copyright (C) 1995, 1996 Robert Gentleman and Ross Ihaka * Copyright (C) 1997--2006 Robert Gentleman, Ross Ihaka and the * R Development Core Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street Fifth Floor, Boston, MA 02110-1301 USA */ /* char here is handled as a whole string */ #ifdef HAVE_CONFIG_H #include #endif #include "Defn.h" static int integerOneIndex(int i, int len) { int indx = -1; if (i > 0) indx = i - 1; else if (i == 0 || len < 2) error(_("attempt to select less than one element")); else if (len == 2 && i > -3) indx = 2 + i; else error(_("attempt to select more than one element")); return(indx); } int attribute_hidden OneIndex(SEXP x, SEXP s, int len, int partial, SEXP *newname, int pos) { SEXP names; int i, indx, nx; if (pos < 0 && length(s) > 1) error(_("attempt to select more than one element")); if (pos < 0 && length(s) < 1) error(_("attempt to select less than one element")); if(pos < 0) pos = 0; indx = -1; *newname = R_NilValue; switch(TYPEOF(s)) { case LGLSXP: case INTSXP: indx = integerOneIndex(INTEGER(s)[pos], len); break; case REALSXP: indx = integerOneIndex(REAL(s)[pos], len); break; case STRSXP: nx = length(x); names = getAttrib(x, R_NamesSymbol); if (names != R_NilValue) { /* Try for exact match */ for (i = 0; i < nx; i++) if (streql(CHAR(STRING_ELT(names, i)), CHAR(STRING_ELT(s, pos)))) { indx = i; break; } /* Try for partial match */ if (partial && indx < 0) { len = strlen(CHAR(STRING_ELT(s, pos))); for(i = 0; i < nx; i++) { if(!strncmp(CHAR(STRING_ELT(names, i)), CHAR(STRING_ELT(s, pos)), len)) { if(indx == -1 ) indx = i; else indx = -2; } } } } if (indx == -1) indx = nx; *newname = STRING_ELT(s, pos); break; case SYMSXP: nx = length(x); names = getAttrib(x, R_NamesSymbol); if (names != R_NilValue) { for (i = 0; i < nx; i++) if (streql(CHAR(STRING_ELT(names, i)), CHAR(PRINTNAME(s)))) { indx = i; break; } } if (indx == -1) indx = nx; *newname = STRING_ELT(s, pos); break; default: error(_("invalid subscript type")); } return indx; } int attribute_hidden get1index(SEXP s, SEXP names, int len, Rboolean pok, int pos) { /* Get a single index for the [[ operator. Check that only one index is being selected. pok : is "partial ok" ? */ int indx, i; double dblind; if (pos < 0 && length(s) != 1) { if (length(s) > 1) error(_("attempt to select more than one element")); else error(_("attempt to select less than one element")); } else if(pos >= length(s)) error(_("internal error in use of recursive indexing")); if(pos < 0) pos = 0; indx = -1; switch (TYPEOF(s)) { case LGLSXP: case INTSXP: i = INTEGER(s)[pos]; if(i != NA_INTEGER) indx = integerOneIndex(i, len); break; case REALSXP: dblind = REAL(s)[pos]; if(!ISNAN(dblind)) indx = integerOneIndex((int)dblind, len); break; case STRSXP: /* Try for exact match */ for (i = 0; i < length(names); i++) if (STRING_ELT(names, i) == NA_STRING || STRING_ELT(s, pos) == NA_STRING) { /* NA matches nothing */ } else { if (streql(CHAR(STRING_ELT(names, i)), CHAR(STRING_ELT(s, pos)))) { indx = i; break; } } /* Try for partial match */ if (pok && indx < 0) { len = strlen(CHAR(STRING_ELT(s, pos))); for(i = 0; i < length(names); i++) { if (STRING_ELT(names, i) == NA_STRING || STRING_ELT(s, pos) == NA_STRING) { /* NA matches nothing */ } else { if(!strncmp(CHAR(STRING_ELT(names, i)), CHAR(STRING_ELT(s, pos)), len)) { if(indx == -1)/* first one */ indx = i; else indx = -2;/* more than one partial match */ } } } } break; case SYMSXP: for (i = 0; i < length(names); i++) if (STRING_ELT(names, i) != NA_STRING && streql(CHAR(STRING_ELT(names, i)), CHAR(PRINTNAME(s)))) { indx = i; break; } default: error(_("invalid subscript type")); } return indx; } /* Special Matrix Subscripting: Handles the case x[i] where */ /* x is an n-way array and i is a matrix with n columns. */ /* This code returns a vector containing the integer subscripts */ /* to be extracted when x is regarded as unravelled. */ /* Negative indices are not allowed. */ /* A zero anywhere in a row will cause a zero in the same */ /* position in the result. */ SEXP attribute_hidden mat2indsub(SEXP dims, SEXP s) { int tdim, j, i, k, nrs = nrows(s); SEXP rvec; if (ncols(s) != LENGTH(dims)) error(_("incorrect number of columns in matrix subscript")); PROTECT(rvec = allocVector(INTSXP, nrs)); s = coerceVector(s, INTSXP); setIVector(INTEGER(rvec), nrs, 0); for (i = 0; i < nrs; i++) { tdim = 1; /* compute 0-based subscripts for a row (0 in the input gets -1 in the output here) */ for (j = 0; j < LENGTH(dims); j++) { k = INTEGER(s)[i + j * nrs]; if(k == NA_INTEGER) { INTEGER(rvec)[i] = NA_INTEGER; break; } if(k < 0) error(_("negative values are not allowed in a matrix subscript")); if(k == 0) { INTEGER(rvec)[i] = -1; break; } if (k > INTEGER(dims)[j]) error(_("subscript out of bounds")); INTEGER(rvec)[i] += (k - 1) * tdim; tdim *= INTEGER(dims)[j]; } /* transform to 1 based subscripting (0 in the input gets 0 in the output here) */ if(INTEGER(rvec)[i] != NA_INTEGER) INTEGER(rvec)[i]++; } UNPROTECT(1); return (rvec); } static SEXP nullSubscript(int n) { int i; SEXP indx; indx = allocVector(INTSXP, n); for (i = 0; i < n; i++) INTEGER(indx)[i] = i + 1; return indx; } static SEXP logicalSubscript(SEXP s, int ns, int nx, int *stretch) { int canstretch, count, i, nmax; SEXP indx; canstretch = *stretch; if (!canstretch && ns > nx) error(_("(subscript) logical subscript too long")); nmax = (ns > nx) ? ns : nx; *stretch = (ns > nx) ? ns : 0; if (ns == 0) return(allocVector(INTSXP, 0)); count = 0; for (i = 0; i < nmax; i++) if (LOGICAL(s)[i%ns]) count++; indx = allocVector(INTSXP, count); count = 0; for (i = 0; i < nmax; i++) if (LOGICAL(s)[i%ns]) { if (LOGICAL(s)[i%ns] == NA_LOGICAL) INTEGER(indx)[count++] = NA_INTEGER; else INTEGER(indx)[count++] = i + 1; } return indx; } static SEXP negativeSubscript(SEXP s, int ns, int nx) { SEXP indx; int stretch = 0; int i, ix; PROTECT(indx = allocVector(LGLSXP, nx)); for (i = 0; i < nx; i++) LOGICAL(indx)[i] = 1; for (i = 0; i < ns; i++) { ix = INTEGER(s)[i]; if (ix != 0 && ix != NA_INTEGER && -ix <= nx) LOGICAL(indx)[-ix - 1] = 0; } s = logicalSubscript(indx, nx, nx, &stretch); UNPROTECT(1); return s; } static SEXP positiveSubscript(SEXP s, int ns, int nx) { SEXP indx; int i, zct = 0; for (i = 0; i < ns; i++) { if (INTEGER(s)[i] == 0) zct++; } if (zct) { indx = allocVector(INTSXP, (ns - zct)); for (i = 0, zct = 0; i < ns; i++) if (INTEGER(s)[i] != 0) INTEGER(indx)[zct++] = INTEGER(s)[i]; return indx; } else return s; } static SEXP integerSubscript(SEXP s, int ns, int nx, int *stretch) { int i, ii, min, max, canstretch; Rboolean isna = FALSE; canstretch = *stretch; *stretch = 0; min = 0; max = 0; for (i = 0; i < ns; i++) { ii = INTEGER(s)[i]; if (ii != NA_INTEGER) { if (ii < min) min = ii; if (ii > max) max = ii; } else isna = TRUE; } if (min < -nx) error(_("subscript out of bounds")); if (max > nx) { if(canstretch) *stretch = max; else error(_("subscript out of bounds")); } if (min < 0) { if (max == 0 && !isna) return negativeSubscript(s, ns, nx); else error(_("only 0's may be mixed with negative subscripts")); } else return positiveSubscript(s, ns, nx); return R_NilValue; } typedef SEXP (*StringEltGetter)(SEXP x, int i); /* This uses a couple of horrible hacks in conjunction with * VectorAssign (in subassign.c). If subscripting is used for * assignment, it is possible to extend a vector by supplying new * names, and we want to give the extended vector those names, so they * are returned as the attribute. Also, unset elements of the vector * of new names (places where a match was found) are indicated by * setting the element of the newnames vector to NULL, even though it * is a character vector. */ /* The original code (pre 2.0.0) used a ns x nx loop that was too * slow. So now we hash. Hashing is expensive on memory (up to 32nx * bytes) so it is only worth doing if ns * nx is large. If nx is * large, then it will be too slow unless ns is very small. */ #define USE_HASHING 1 static SEXP stringSubscript(SEXP s, int ns, int nx, SEXP names, StringEltGetter strg, int *stretch, Rboolean in) { SEXP indx, indexnames; int i, j, nnames, sub, extra; int canstretch = *stretch; #ifdef USE_HASHING Rboolean usehashing = in && (ns * nx > 1000); #else Rboolean usehashing = FALSE; #endif PROTECT(s); PROTECT(names); PROTECT(indexnames = allocVector(STRSXP, ns)); nnames = nx; extra = nnames; /* Process each of the subscripts. First we compare with the names * on the vector and then (if there is no match) with each of the * previous subscripts, since (if assigning) we may have already * added an element of that name. (If we are not assigning, any * nonmatch will have given an error.) */ #ifdef USE_HASHING if(usehashing) { /* must be internal, so names contains a character vector */ /* NB: this does not behave in the same way with respect to "" and NA names: they will match */ PROTECT(indx = match(names, s, 0)); /* second pass to correct this */ for (i = 0; i < ns; i++) if(STRING_ELT(s, i) == NA_STRING || !CHAR(STRING_ELT(s, i))[0]) INTEGER(indx)[i] = 0; for (i = 0; i < ns; i++) SET_STRING_ELT(indexnames, i, R_NilValue); } else { #endif PROTECT(indx = allocVector(INTSXP, ns)); for (i = 0; i < ns; i++) { sub = 0; if (names != R_NilValue) { for (j = 0; j < nnames; j++) { SEXP names_j = strg(names, j); if (!in && TYPEOF(names_j) != CHARSXP) error(_("character vector element does not have type CHARSXP")); if (NonNullStringMatch(STRING_ELT(s, i), names_j)) { sub = j + 1; SET_STRING_ELT(indexnames, i, R_NilValue); break; } } } INTEGER(indx)[i] = sub; } #ifdef USE_HASHING } #endif for (i = 0; i < ns; i++) { sub = INTEGER(indx)[i]; if (sub == 0) { for (j = 0 ; j < i ; j++) if (NonNullStringMatch(STRING_ELT(s, i), STRING_ELT(s, j))) { sub = INTEGER(indx)[j]; SET_STRING_ELT(indexnames, i, STRING_ELT(s, j)); break; } } if (sub == 0) { if (!canstretch) error(_("subscript out of bounds")); extra += 1; sub = extra; SET_STRING_ELT(indexnames, i, STRING_ELT(s, i)); } INTEGER(indx)[i] = sub; } /* We return the new names as the names attribute of the returned subscript vector. */ if (extra != nnames) setAttrib(indx, R_NamesSymbol, indexnames); if (canstretch) *stretch = extra; UNPROTECT(4); return indx; } /* Array Subscripts. dim is the dimension (0 to k-1) s is the subscript list, dims is the dimensions of x dng is a function (usually getAttrib) that obtains the dimnames x is the array to be subscripted. */ typedef SEXP AttrGetter(SEXP x, SEXP data); static SEXP int_arraySubscript(int dim, SEXP s, SEXP dims, AttrGetter dng, StringEltGetter strg, SEXP x, Rboolean in) { int nd, ns, stretch = 0; SEXP dnames, tmp; ns = length(s); nd = INTEGER(dims)[dim]; switch (TYPEOF(s)) { case NILSXP: return allocVector(INTSXP, 0); case LGLSXP: return logicalSubscript(s, ns, nd, &stretch); case INTSXP: return integerSubscript(s, ns, nd, &stretch); case REALSXP: PROTECT(tmp = coerceVector(s, INTSXP)); tmp = integerSubscript(tmp, ns, nd, &stretch); UNPROTECT(1); return tmp; case STRSXP: dnames = dng(x, R_DimNamesSymbol); if (dnames == R_NilValue) error(_("no 'dimnames' attribute for array")); dnames = VECTOR_ELT(dnames, dim); return stringSubscript(s, ns, nd, dnames, strg, &stretch, in); case SYMSXP: if (s == R_MissingArg) return nullSubscript(nd); default: error(_("invalid subscript")); } return R_NilValue; } /* This is used by packages arules and cba. Seems dangerous as the typedef is not exported */ SEXP arraySubscript(int dim, SEXP s, SEXP dims, AttrGetter dng, StringEltGetter strg, SEXP x) { return int_arraySubscript(dim, s, dims, dng, strg, x, TRUE); } /* Subscript creation. The first thing we do is check to see */ /* if there are any user supplied NULL's, these result in */ /* returning a vector of length 0. */ /* if stretch is zero on entry then the vector x cannot be "stretched", otherwise, stretch returns the new required length for x */ SEXP attribute_hidden makeSubscript(SEXP x, SEXP s, int *stretch) { int nx; SEXP ans; ans = R_NilValue; if (isVector(x) || isList(x) || isLanguage(x)) { nx = length(x); ans = vectorSubscript(nx, s, stretch, getAttrib, (STRING_ELT), x); } else error(_("subscripting on non-vector")); return ans; } /* nx is the length of the object being subscripted, s is the R subscript value, dng gets a given attrib for x, which is the object we are subsetting, */ static SEXP int_vectorSubscript(int nx, SEXP s, int *stretch, AttrGetter dng, StringEltGetter strg, SEXP x, Rboolean in) { int ns; SEXP ans = R_NilValue, tmp; ns = length(s); /* special case for simple indices -- does not duplicate */ if (ns == 1 && TYPEOF(s) == INTSXP && ATTRIB(s) == R_NilValue) { int i = INTEGER(s)[0]; if (0 < i && i <= nx) { *stretch = 0; return s; } } PROTECT(s = duplicate(s)); SET_ATTRIB(s, R_NilValue); SET_OBJECT(s, 0); switch (TYPEOF(s)) { case NILSXP: *stretch = 0; ans = allocVector(INTSXP, 0); break; case LGLSXP: /* *stretch = 0; */ ans = logicalSubscript(s, ns, nx, stretch); break; case INTSXP: ans = integerSubscript(s, ns, nx, stretch); break; case REALSXP: PROTECT(tmp = coerceVector(s, INTSXP)); ans = integerSubscript(tmp, ns, nx, stretch); UNPROTECT(1); break; case STRSXP: { SEXP names = dng(x, R_NamesSymbol); /* *stretch = 0; */ ans = stringSubscript(s, ns, nx, names, strg, stretch, in); } break; case SYMSXP: *stretch = 0; if (s == R_MissingArg) { ans = nullSubscript(nx); break; } default: error(_("invalid subscript type")); } UNPROTECT(1); return ans; } SEXP attribute_hidden vectorSubscript(int nx, SEXP s, int *stretch, AttrGetter dng, StringEltGetter strg, SEXP x) { return int_vectorSubscript(nx, s, stretch, dng, strg, x, TRUE); }