# $Id: strings.py,v 1.56 2006/01/04 18:33:58 jaytmiller Exp $
#
"""
Large chararray test
>>> xx=array(None,itemsize=3,shape=220000)
>>> import cPickle
>>> c=cPickle.loads(cPickle.dumps(fromlist(["this","that","something else"])))
>>> c
CharArray(['this', 'that', 'something else'])
>>> c._type
CharArrayType(14)
>>> a=fromlist(["this"]*25); a.shape=(5,5); a[ range(2,4) ]
CharArray([['this', 'this', 'this', 'this', 'this'],
['this', 'this', 'this', 'this', 'this']])
>>> a[ range(2,4) ] = fromlist(["that"]); a
CharArray([['this', 'this', 'this', 'this', 'this'],
['this', 'this', 'this', 'this', 'this'],
['that', 'that', 'that', 'that', 'that'],
['that', 'that', 'that', 'that', 'that'],
['this', 'this', 'this', 'this', 'this']])
>>> array([], shape=(0,1,2))
CharArray([])
>>> a = _gen.concatenate([array(["1"]*3), array(["2"]*3)]); a
CharArray(['1', '1', '1', '2', '2', '2'])
>>> _gen.reshape(a, (2,3))
CharArray([['1', '1', '1'],
['2', '2', '2']])
>>> CharArray(buffer="thatthis", shape=(2,), itemsize=4,
... bytestride=-4, byteoffset=4)
CharArray(['this', 'that'])
"""
from copy import _EmptyClass
import memory
import _bytes
import _chararray
import numarrayall as _na
import generic as _gen
import sys
import string
import types
import os
import re
import arrayprint
import warnings
import numerictypes as _nt
import numinclude as _numinclude
import _ndarray as _nda
_globals, _locals = globals, locals
class CharArrayType:
def __init__(self, itemsize):
self.itemsize = itemsize
self.name = "CharArrayType(%d)" % (self.itemsize,)
def __repr__(self):
return self.name
CharArrayTypeCache = {}
def NewCharArrayType(itemsize):
"""NewCharArrayType creates CharArrayTypes on demand, but checks to see
if they already exist in the cache first. This makes type equivalence
the same as object identity.
"""
if itemsize not in CharArrayTypeCache.keys():
CharArrayTypeCache[itemsize] = CharArrayType(itemsize)
return CharArrayTypeCache[itemsize]
class PrecisionWarning(UserWarning):
pass
class RawCharArray(_na.NDArray):
"""RawCharArray(buffer=None, shape=None, byteoffset=0, bytestride=None)
RawCharArray is a fixed length array of characters based on
ndarray.NDArray with no automatic stripping or padding.
itemsize specifies the length of all strings in the array.
"""
_is_raw = 1
def __init__(self, buffer=None, itemsize=None, shape=None, byteoffset=0,
bytestride=None, aligned=1, type=None, padc=" "):
if isinstance(shape, types.IntType):
shape = (shape,)
if type is not None:
if itemsize is not None:
raise ValueError("Specify type *or* itemsize, not both.")
itemsize = type.itemsize
if not (padc, str) or len(padc) <> 1:
raise ValueError("padc must be a string of length 1.")
if buffer is None:
if shape is None or itemsize is None:
raise ValueError("Must define both shape & itemsize if buffer is None")
else:
if shape is None and itemsize is None:
raise ValueError("Must specify shape, itemsize, or both.")
ni = _bufferItems( buffer, byteoffset, bytestride, itemsize)
if shape and itemsize == None:
itemsize = ni/_na.product(shape)
if itemsize and shape == None:
shape = (ni,)
if itemsize == 0: # Another hack for 0 length strings.
bytestride=0
if not _nda.is_buffer(buffer) and buffer is not None:
raise TypeError("buffer must either support the C buffer protocol or return something that does from its __buffer__() method");
_na.NDArray.__init__(self, shape=shape, itemsize=itemsize,
buffer=buffer, byteoffset=byteoffset,
bytestride=bytestride, aligned=aligned)
self._flags |= _gen._UPDATEDICT
if type is None:
type = NewCharArrayType(itemsize)
self._type = type
self._padc = padc
if buffer is None:
self.fill(" ")
def __get_array_typestr__(self):
return '|S%d' % self._itemsize
__array_typestr__ = property(__get_array_typestr__, None, "")
def tolist(self):
"""returns array as a (nested) list of strings."""
if len(self._shape) == 1:
if self._shape[0] > 0:
return [ x for x in self ]
else:
return []
else:
return [ ni.tolist() for ni in self ]
def __getstate__(self):
"""returns RawCharArray state dictionary for pickling"""
state = _na.NDArray.__getstate__(self)
state["_type"] = self._type.itemsize
return state
def __setstate__(self, state):
"""sets RawCharArray dictionary from pickled state"""
_na.NDArray.__setstate__(self, state)
self._type = NewCharArrayType(state["_type"])
def isbyteswapped(self):
"""CharArray's are *never* byteswapped. returns 0."""
return 0
def isaligned(self):
"""CharArray's are *always* aligned. returns 1."""
return 1
def sinfo(self):
"returns string describing a CharArray"
s = _na.NDArray.sinfo(self)
s += "type: " + repr(self._type) + "\n"
return s
def _getitem(self, offset):
"""_getitem(self, offset) returns the "stripped" fixed length
string from 'self' at 'offset'.
"""
if isinstance(self._data, memory.MemoryType):
s = buffer(self._data)[offset:offset+self._itemsize]
return self.strip(s)
else:
return self.strip(str(self._data[offset:offset+self._itemsize]))
def _setitem(self, offset, value):
"""_setitem(self, offset) sets 'offset' to result of "padding" 'value'.
_setitem silently truncates inputs which are too long.
>>> s=array([""])
>>> s[0] = "this"
>>> s
CharArray([''])
>>> s=array(["this","that"])
>>> s[0] = "othe"
>>> s
CharArray(['othe', 'that'])
>>> s[1] = "v"
>>> s
CharArray(['othe', 'v'])
>>> s = array("")
>>> s[0] = "this"
>>> s
CharArray([''])
"""
bo = offset
where = memory.writeable_buffer(self._data)
where[bo:bo+self._itemsize] = self.pad(value)[0:self._itemsize]
def _byteView(self):
"""_byteView(self) returns a view of self as an array of bytes.
A _byteView cannot be taken from a chararray with itemsize==0.
"""
if self._itemsize == 0:
raise ValueError("_byteView doesn't work for zero length items.")
b = _na.NumArray(buffer=self._data,
shape=self._shape+(self._itemsize,),
type=_na.UInt8,
byteoffset=self._byteoffset,
bytestride=self._bytestride)
b._strides = self._strides + (1,)
return b;
def _copyFrom(self, arr):
"""
>>> c = array([""])
>>> c[:] = array(["this"])
>>> c
CharArray([''])
>>> c = array(["this","that"])
>>> c[:] = array(["a really long line","a"])
>>> c
CharArray(['a re', 'a'])
>>> c[:] = ["more","money"]
>>> c
CharArray(['more', 'mone'])
>>> c[:] = array(["x"])
>>> c
CharArray(['x', 'x'])
"""
if self._itemsize == 0:
return
# Convert lists and strings to chararray.
arr = asarray(arr, itemsize=self._itemsize,
padc=self._padc, kind=self.__class__)
# Ensure shapes match.
arr = self._broadcast( arr )
if arr._itemsize == 0: return
# Get views of both source and destination as UInt8 numarray.
it = arr._byteView()
me = self._byteView()
if self._itemsize <= arr._itemsize:
me[:] = it[..., :self._itemsize]
else:
me[...,:it._shape[-1]] = it
# zero fill the part of subarr *not* covered by arr
me[...,it._shape[-1]:] = 0
def copy(self):
"""Return a new array with the same shape and type, but a copy
of the data
>>> c = fromlist(["this","that", "another"])
>>> d = c.copy()
>>> d
CharArray(['this', 'that', 'another'])
>>> int(c._data is d._data)
0
"""
arr = self.view()
arr._data = memory.new_memory(arr._itemsize * arr.nelements())
arr._byteoffset = 0
arr._bytestride = arr._itemsize
arr._strides = arr._stridesFromShape()
arr._itemsize = self._itemsize
if _na.product(self._shape):
copyfunction = _bytes.functionDict["copyNbytes"]
copyfunction(arr._shape, self._data, self._byteoffset,
self._strides, arr._data, 0, arr._strides,
arr._itemsize)
return arr
def substringView(self, i, j):
"""substringView returns modified view of the input array which
represents only the [i:j] substring of each array element.
>>> c = fromlist([["this","that"],["another", "one"]])
>>> d = c.substringView(1, 2); d
CharArray([['h', 'h'],
['n', 'n']])
>>> d[:] = [["1","2"],["3","4"]]; d
CharArray([['1', '2'],
['3', '4']])
>>> c
CharArray([['t1is', 't2at'],
['a3other', 'o4e']])
"""
n = _na.arange(self._itemsize)[i:j]
if len(n) != 0:
i = n[0]
j = n[-1]+1
else:
i = j = 0
r = self.view()
substr_size = j-i
if substr_size < 0:
substr_size = 0
r._itemsize = substr_size
r._byteoffset += i
return r
def _broadcast(self, other):
return _na.NDArray._broadcast(self, other)
def _dualbroadcast(self, other):
s, o = _na.NDArray._dualbroadcast(self, other)
if not _na.product(s._strides):
s = s.copy()
if not _na.product(o._strides):
o = o.copy()
return s, o
def concatenate(self, other):
"""concatenate(self, other) concatenates two numarray element by element
>>> array(["this", "that", "another"]).stripAll() + "."
CharArray(['this.', 'that.', 'another.'])
>>> array([""])+array([""])
CharArray([''])
"""
a = asarray(other, padc=self._padc, kind=self.__class__)
self, a = self._dualbroadcast(a)
result = array(buffer=None, shape=self._shape,
itemsize=self._itemsize+a._itemsize,
padc=self._padc, kind=self.__class__)
if a is other: # since stripAll() mutates the array
a = a.copy()
_chararray.Concat(self.__class__ is RawCharArray,
self, a.stripAll(), result)
return result.padAll()
def __add__(self, other):
"""
>>> map(str, range(3)) + array(["this","that","another one"])
CharArray(['0this', '1that', '2another one'])
>>> "" + array(["this", "that"])
CharArray(['this', 'that'])
>>> "prefix with trailing whitespace " + array(["."])
CharArray(['prefix with trailing whitespace .'])
>>> "" + array("")
CharArray([''])
>>> array(["this", "that", "another one"], kind=RawCharArray) + map(str, range(3))
RawCharArray(['this 0', 'that 1', 'another one2'])
"""
return self.concatenate(other)
def __radd__(self, other):
return asarray(other, padc=self._padc,
kind=self.__class__).concatenate(self)
def __iadd__(self, other):
self[:] = self.concatenate(other)
return self
def strip(self, value):
return value
def pad(self, value):
return value
def stripAll(self):
return self
def padAll(self):
return self
def _format(self, x):
"""_format() formats a single array element for str() or repr()"""
return repr(self.strip(x))
def __cmp__(self, other):
s, t = str(self), str(other)
return cmp(s,t)
def fill(self, char):
"""fill(self, char) fills the array entirely with 'char'.
>>> x=array([""])
>>> x.fill(' ')
>>> x
CharArray([''])
>>> x=array(["this"])
>>> x.fill("x")
>>> x
CharArray(['xxxx'])
"""
if self._itemsize and self.nelements():
if self.rank > 0:
self[:] = char*self._itemsize
else:
self[()] = char*self._itemsize
def raw(self):
"""raw(self) returns a raw view of self.
>>> c=fromlist(["this","that","another"])
>>> c.raw()
RawCharArray(['this ', 'that ', 'another'])
"""
arr = self.view()
arr.__class__ = RawCharArray # "Anchor" on RawCharArray.
return arr
def contiguous(self):
"""contiguous(self) returns a version of self which is guaranteed to
be contiguous. If self is contiguous already, it returns self.
Otherwise, it returns a copy of self.
"""
if self.iscontiguous():
return self
else:
return self.copy()
def resized(self, n, fill='\0'):
"""resized(self, n) returns a copy of self, resized so that each
item is of length n characters. Extra characters are filled with
the value of 'fill'. If self._itemsize == n, self is returned.
>>> c = fromlist(["this","that","another"])
>>> c._itemsize
7
>>> d=c.resized(20)
>>> d
CharArray(['this', 'that', 'another'])
>>> d._itemsize
20
"""
if self._itemsize != n:
ext = self.__class__(shape=self._shape, itemsize=n)
ext.fill(fill)
ext[:] = self
return ext
else:
return self
# First half of comparison operators, slow version.
def _StrCmp(self, mode, raw, other0):
"""StrCmp(self, other0) calls strncmp on corresponding items of the
two numarray, self and other.
"""
if not isinstance(other0, self.__class__):
other = asarray(other0, padc=self._padc, kind=self.__class__)
else:
other = other0
if self._shape != other._shape:
self, other = self._dualbroadcast(other)
if self._itemsize < other._itemsize:
self = self.resized(other._itemsize)
elif other._itemsize < self._itemsize:
other = other.resized(self._itemsize)
if self is None:
raise ValueError("Incompatible array dimensions")
return _chararray.StrCmp(self, mode, raw, other)
# rich comparisons (only works in Python 2.1 and later)
def __eq__(self, other):
"""
>>> array(["this ", "thar", "other"]).__eq__(array(["this", "that", "another"]))
array([1, 0, 0], type=Bool)
>>> array([""]).__eq__(array([""]))
array([1], type=Bool)
>>> array([""]).__eq__(array(["x"]))
array([0], type=Bool)
>>> array(["x"]).__eq__(array([""]))
array([0], type=Bool)
>>> a = array(["this"]*10)
>>> a == a # compare to self
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], type=Bool)
"""
return _chararray.StrCmp(self, 0, self._is_raw, other)
def __ne__(self, other):
"""
>>> s=array(["this ", "thar", "other"])
>>> t=array(["this", "that", "another"])
>>> s.__ne__(t)
array([0, 1, 1], type=Bool)
"""
return _chararray.StrCmp(self, 1, self._is_raw, other)
def __lt__(self, other):
"""
>>> s=array(["this ", "thar", "other"])
>>> t=array(["this", "that", "another"])
>>> s.__lt__(t)
array([0, 1, 0], type=Bool)
"""
return _chararray.StrCmp(self, 2, self._is_raw, other)
def __gt__(self, other):
"""
>>> s=array(["this ", "thar", "other"])
>>> t=array(["this", "that", "another"])
>>> s.__gt__(t)
array([0, 0, 1], type=Bool)
"""
return _chararray.StrCmp(self, 3, self._is_raw, other)
def __le__(self, other):
"""
>>> s=array(["this ", "thar", "other"])
>>> t=array(["this", "that", "another"])
>>> s.__le__(t)
array([1, 1, 0], type=Bool)
"""
return _chararray.StrCmp(self, 4, self._is_raw, other)
def __ge__(self, other):
"""
>>> s=array(["this ", "thar", "other"])
>>> t=array(["this", "that", "another"])
>>> s.__ge__(t)
array([1, 0, 1], type=Bool)
"""
return _chararray.StrCmp(self, 5, self._is_raw, other)
if sys.version_info >= (2,1,0):
def _test_rich_comparisons():
"""
>>> s=array(["this ", "thar", "other"])
>>> t=array(["this", "that", "another"])
>>> s == t
array([1, 0, 0], type=Bool)
>>> s < t
array([0, 1, 0], type=Bool)
>>> s >= t
array([1, 0, 1], type=Bool)
>>> s <= t
array([1, 1, 0], type=Bool)
>>> s > t
array([0, 0, 1], type=Bool)
"""
pass
def __contains__(self, str):
"""
Returns 1 if-and-only-if 'self' has an element == to 'str'
>>> s=array(["this ", "thar", "other"])
>>> int("this" in s)
1
>>> int("tjt" in s)
0
>>> x=array([""])
>>> int("this" in x)
0
>>> int("" in x)
1
"""
return _na.logical_or.reduce(_na.ravel(self.__eq__(str)))
def sort(self):
"""
>>> a=fromlist(["other","this","that","another"])
>>> a.sort()
>>> a
CharArray(['another', 'other', 'that', 'this'])
"""
l = self.tolist()
l.sort()
self[:] = fromlist(l)
def argsort(self, axis=-1):
"""
>>> a=fromlist(["other","that","this","another"])
>>> a.argsort()
array([3, 0, 1, 2])
"""
if axis != -1:
raise TypeError("CharArray.argsort() does not support the axis parameter.")
ax = range(len(self))
ax.sort(lambda x,y,z=self: cmp(z[x],z[y]))
return _na.array(ax)
def amap(self, f):
"""amap() returns the nested list which results from applying
function 'f' to each element of 'self'.
"""
if len(self._shape) == 1:
return [f(i) for i in self]
else:
ans = []
for i in self:
ans.append(i.amap(f))
return ans
def match(self, pattern, flags=0):
"""
>>> a=fromlist([["wo","what"],["wen","erewh"]])
>>> a.match("wh[aebd]")
(array([0]), array([1]))
>>> a.match("none")
(array([], type=Long), array([], type=Long))
>>> b=array([""])
>>> b.match("this")
(array([], type=Long),)
>>> b.match("")
(array([0]),)
"""
matcher = re.compile(pattern, flags).match
l = lambda x, f=matcher: int(f(x) is not None)
matches = _na.array(self.amap(l), type=_na.Bool)
if len(matches):
return _na.nonzero(matches)
else:
return ()
def search(self, pattern, flags=0):
"""
>>> a=fromlist([["wo","what"],["wen","erewh"]])
>>> a.search("wh")
(array([0, 1]), array([1, 1]))
>>> a.search("1")
(array([], type=Long), array([], type=Long))
>>> b=array(["",""])
>>> b.search("1")
(array([], type=Long),)
>>> b.search("")
(array([0, 1]),)
"""
searcher = re.compile(pattern, flags).search
l = lambda x, f=searcher: int(f(x) is not None)
matches = _na.array(self.amap(l), type=_na.Bool)
if len(matches):
return _na.nonzero(matches)
else:
return ()
def grep(self, pattern, flags=0):
"""
>>> a=fromlist([["who","what"],["when","where"]])
>>> a.grep("whe")
CharArray(['when', 'where'])
"""
return _gen.take(self, self.match(pattern, flags), axis=(0,))
def sub(self, pattern, replacement, flags=0, count=0):
"""
>>> a=fromlist([["who","what"],["when","where"]])
>>> a.sub("wh", "ph")
>>> a
CharArray([['pho', 'phat'],
['phen', 'phere']])
"""
cpat = re.compile(pattern, flags)
l = lambda x, p=cpat, r=replacement, c=count: re.sub(p, r, x, c)
self[:] = fromlist( self.amap(l) )
def eval(self):
"""eval(self) converts CharArray 'self' into a NumArray.
This is the original slow implementation based on a Python loop
and the eval() function.
>>> array([["1","2"],["3","4"]]).eval()
array([[1, 2],
[3, 4]])
>>> try:
... array([["1","2"],["3","other"]]).eval()
... except NameError:
... pass
"""
n = _na.array([ eval(x,{},{}) for x in _na.ravel(self)])
n.setshape(self._shape)
return n
def fasteval(self, type=_na.Float64):
"""fasteval(self, type=Float64) converts CharArray 'self' into
a NumArray of the specified type. fasteval() can't convert
complex arrays at all, and loses precision when converting
UInt64 or Int64.
>>> array([["1","2"],["3","4"]]).fasteval().astype('Long')
array([[1, 2],
[3, 4]])
>>> try:
... array([["1","2"],["3","other"]]).fasteval()
... except _chararray.error:
... pass
"""
n = _na.array(shape=self._shape, type=_na.Float64)
type = _nt.getType(type)
_chararray.Eval((), self, n);
if type != _na.Float64:
if ((type is _na.Int64) or
(_numinclude.hasUInt64 and type is _na.UInt64)):
warnings.warn("Loss of precision converting to 64-bit type. Consider using eval().", PrecisionWarning)
return n.astype(type)
else:
return n
class CharArray(RawCharArray):
"""
>>> array("thisthatthe othe",shape=(4,),itemsize=4)
CharArray(['this', 'that', 'the', 'othe'])
>>> array("thisthatthe othe",shape=(4,),itemsize=4)._shape
(4,)
>>> array("thisthatthe othe",shape=(4,),itemsize=4)._itemsize
4
>>> array([["this","that"],["x","y"]])
CharArray([['this', 'that'],
['x', 'y']])
>>> array([["this","that"],["x","y"]])._shape
(2, 2)
>>> array([["this","that"],["x","y"]])._itemsize
4
>>> s=array([["this","that"],["x","y"]], itemsize=10)
>>> s
CharArray([['this', 'that'],
['x', 'y']])
>>> s._itemsize
10
>>> s[0][0]
'this'
>>> # s[1,1][0] = 'z' # Char assigment doesn't work!
>>> s[1,1] = 'z' # But padding may do what you want.
>>> s # Otherwise, specify all of s[1,1] or subclass.
CharArray([['this', 'that'],
['x', 'z']])
"""
_is_raw = 0
def resized(self, n, fill=' '):
"""Same as RawCharArray.resized() but fills with blanks rather than
NUL."""
return RawCharArray.resized(self, n, fill)
def pad(self, value):
"""
pad(self, value) implements CharArray's string-filling policy
which is used when strings are assigned to elements of a CharArray.
Pad extends 'value' to length self._itemsize using spaces.
"""
return _chararray.Pad(value, self._itemsize, ord(self._padc))
def strip(self, value):
"""
strip(self, value) implements CharArray's string fetching
"cleanup" policy. strip truncates 'value' at the first NULL
and removes all trailing whitespace from the remainder. For
compatability with FITS, leading whitespace is never
completely stripped: a string beginning with a space always
returns at least one space to distinguish it from the empty
string.
"""
return _chararray.Strip(value)
def stripAll(self):
"""
stripAll(self) applies the chararray strip function to each element
of self and returns the result. The result may be a new array.
"""
_chararray.StripAll(None, self)
return self
def padAll(self):
"""
padAll(self) applies the chararray pad function to each element
of self and returns the result. The result may be a new array.
"""
_chararray.PadAll(self._padc, self)
return self
def toUpper(self):
"""toUpper(self) converts all elements of self to upper case
>>> a = fromlist(["That","this","another"])
>>> a.toUpper()
>>> a
CharArray(['THAT', 'THIS', 'ANOTHER'])
"""
_chararray.ToUpper(None, self)
def toLower(self):
"""toLower(self) converts all elements of self to upper case
>>> a = fromlist(["THAT","this","anOther"])
>>> a.toLower()
>>> a
CharArray(['that', 'this', 'another'])
"""
_chararray.ToLower(None, self)
def maxLen(self):
"""
maxLen(self) computes the length of the longest string in
self. Maxlen will applies the strip function to each element
prior to computing its length.
>>> array(["this ","that"]).maxLen()
4
"""
n = _na.NumArray(buffer=None, shape=self.shape, type=_na.Int32)
_chararray.StrLen(None, self, n)
return n.max()
def truncated(self):
"""
truncate(self) returns a new array with the smallest possible itemsize
which will hold the stripped contents of self.
>>> array(["this ","that"])._itemsize
6
>>> array(["this ","that"]).truncated()._itemsize
4
"""
return self.resized(self.maxLen())
def count(self, s):
"""count(self, s) counts the number of occurences of string 's'.
>>> int(array(["this","that","another","this"]).count("this"))
2
"""
return self.__eq__(s).sum('Int64')
def index(self, s):
"""index(self, s) returns the index of the first occurenced of
's' in 'self'.
>>> array([["this","that","another"],
... ["another","this","that"]]).index("another")
(0, 2)
>>> array([["this","that","another"],
... ["another","this","that"]]).index("not here")
Traceback (most recent call last):
ValueError: string 'not here' not in array
"""
indices = _na.nonzero(self.__eq__(s))
if len(indices[0]):
first = map(lambda x: x[0], indices)
return tuple(first)
else:
raise ValueError("string " + `s` +" not in array")
def isString(s):
return isinstance(s, str)
def isPySequence(s):
return hasattr(s, '__getitem__') and hasattr(s,'__len__')
def _slistShape0(slist):
"""_slistShape0(slist) computes the (shape, itemsize) tuple
of the nested string list 'slist'.
itemsize is set to the maximum of all string lengths in slist.
>>> s=["this","that","the other"]
>>> _slistShape(s)
((3,), 9)
>>> _slistShape((s,s,s,s))
((4, 3), 9)
>>> _slistShape(["this", ["that","other"]])
Traceback (most recent call last):
...
ValueError: Nested sequences with different lengths.
"""
if isinstance(slist, str):
return ((), len(slist),)
elif len(slist) == 0:
return ((0,), 0)
else:
shape, itemsize = _slistShape0(slist[0])
for s in slist[1:]:
if isinstance(s, str):
itemsize = max(itemsize, len(s))
newshape = ()
else:
newshape, newitemsize = _slistShape0(s)
itemsize = max(itemsize, newitemsize)
if newshape != shape:
raise ValueError("Nested sequences with different lengths.")
return ((len(slist),)+ shape, itemsize)
def _slistShape(slist, itemsize=None, shape=None):
"""_slistShape(slist, itemsize=None, shape=None) computes the "natural"
shape and itemsize of slist, and combines this with the specified
itemsize and shape, checking for consistency.
Specifying an itemsize overrides the slist's natural itemsize.
>>> _slistShape(["this","that"], itemsize=10)
((2,), 10)
>>> _slistShape(["this","that"], itemsize=3)
((2,), 3)
Specifying a shape checks for consistency against the slist's shape.
>>> _slistShape(["this","that"], shape=(2,1,1))
((2, 1, 1), 4)
>>> _slistShape(["this","that"], shape=(3,2))
Traceback (most recent call last):
...
ValueError: Inconsistent list and shape
"""
shape_items = _slistShape0(slist)
if shape is None:
shape = shape_items[0]
else:
if _gen.product(shape) != _gen.product(shape_items[0]):
raise ValueError("Inconsistent list and shape")
if itemsize is None:
itemsize = shape_items[-1]
else:
pass # specified itemsize => padded extension or silent truncation.
return (shape, itemsize)
def _pad(slist, n, c=" "):
"""_pad(slist, n, c=' ') pads each member of string list 'slist' with
fill character 'c' to a total length of 'n' characters and returns
the concatenated results.
strings longer than n are *truncated*.
>>>
>>> _pad(["this","that","the other"],9," ")
'this that the other'
"""
if isinstance(slist, str):
if n > len(slist):
return slist + c*(n-len(slist))
else:
return slist[:n]
else:
result = []
for s in slist:
if isinstance(s, str):
if n > len(s):
t = s + c*(n-len(s))
else:
t = s[:n]
else:
t = _pad(s, n, c)
result.append(t)
return "".join(result)
def fromlist(slist, itemsize=None, shape=None, padc=" ", kind=CharArray):
"""fromlist(slist, padc=" ") creates a CharArray from a multi-dimensional
list of strings, 'slist', padding each string to the length of the
longest string with character 'padc'.
>>> s=fromlist([["this","that"],["x","y"]])
>>> s
CharArray([['this', 'that'],
['x', 'y']])
>>> s[0][0]
'this'
>>> s[1][1]
'y'
>>> s[1][1] = "whom"
>>> s[1][1]
'whom'
>>> fromlist(['this', 'that'], itemsize=2)
CharArray(['th', 'th'])
>>> fromlist(['t','u'], itemsize=3)
CharArray(['t', 'u'])
>>> fromlist(['t','u'], itemsize=3)._itemsize
3
"""
slist = list(slist) # convert tuples
shape, itemsize = _slistShape(slist, itemsize=itemsize, shape=shape)
s = _pad(slist, itemsize, padc) # compute padded concatenation of slist
return fromstring(s, shape=shape, itemsize=itemsize, padc=padc, kind=kind)
def _stringToBuffer(datastring):
"""_stringToBuffer(datastring) allocates a buffer, copies datastring into
it, and returns the buffer.
"""
abuff = memory.new_memory( len(datastring) )
memory.writeable_buffer(abuff)[:] = datastring
return abuff
def fromstring(s, itemsize=None, shape=None, padc=" ", kind=CharArray):
"""Create an array from binary data contained in a string (by copying)
>>> fromstring('thisthat', itemsize=4)
CharArray(['this', 'that'])
>>> fromstring('thisthat', shape=2)
CharArray(['this', 'that'])
>>> fromstring('this is a test', shape=(1,))
CharArray(['this is a test'])
"""
if isinstance(shape, types.IntType):
shape = (shape,)
if ((shape in [None, (1,)])
and (itemsize is not None
and itemsize > len(s))):
s = _pad(s, itemsize, padc)
if shape is None and not itemsize:
shape = (1,)
itemsize = len(s)
return kind(_stringToBuffer(s), shape=shape, itemsize=itemsize, padc=padc)
def fromfile(file, itemsize=None, shape=None, padc=" ", kind=CharArray):
"""Create an array from binary file data
If file is a string then that file is opened, else it is assumed
to be a file object. No options at the moment, all file positioning
must be done prior to this function call with a file object
>>> import testdata
>>> s=fromfile(testdata.filename, shape=-1, itemsize=80)
>>> s[0]
'SIMPLE = T / file does conform to FITS standard'
>>> s._shape
(108,)
>>> s._itemsize
80
"""
if isinstance(shape, types.IntType):
shape = (shape,)
name = 0
if isString(file):
name = 1
file = open(file, 'rb')
size = int(os.path.getsize(file.name) - file.tell())
if not shape and not itemsize:
shape = (1,)
itemsize = size
elif shape is not None:
if itemsize is not None:
shapesize = _na.product(shape)*itemsize
if shapesize < 0:
shape = list(shape)
shape[ shape.index(-1) ] = size / -shapesize
shape = tuple(shape)
else:
shapesize=_na.product(shape)
if shapesize < 0:
raise ValueError("Shape dimension of -1 requires itemsize.")
itemsize = size / shapesize
elif itemsize:
shape = (size/itemsize,)
else:
raise ValueError("Must define shape or itemsize.")
nbytes = _na.product(shape)*itemsize
if nbytes > size:
raise ValueError(
"Not enough bytes left in file for specified shape and type")
# create the array
arr = kind(None, shape=shape, itemsize=itemsize, padc=padc)
nbytesread = file.readinto(arr._data)
if nbytesread != nbytes:
raise IOError("Didn't read as many bytes as expected")
if name:
file.close()
return arr
def array(buffer=None, itemsize=None, shape=None, byteoffset=0,
bytestride=None, padc=" ", kind=CharArray):
"""array(buffer=None, itemsize=None, shape=None) creates a new instance
of a CharArray.
buffer specifies the source of the array's initialization data.
type(buffer) is in [ None, CharArray, StringType,
ListType, FileType, BufferType].
itemsize specifies the fixed maximum length of the array's strings.
shape specifies the array dimensions.
>>> array(None,itemsize=3,shape=(1,))
CharArray([' '])
>>> array(buffer("abcdsedxxxxxncxn"), itemsize=2, byteoffset=4, bytestride=8)
CharArray(['se', 'nc'])
>>> array("abcd", itemsize=2)
CharArray(['ab', 'cd'])
>>> array(['this', 'that'], itemsize=10)
CharArray(['this', 'that'])
>>> array(['this', 'that'], itemsize=10)._itemsize
10
>>> array(array(['this', 'that'], itemsize=10))
CharArray(['this', 'that'])
>>> import testdata
>>> array(open(testdata.filename,"r"),itemsize=80, shape=2)
CharArray(['SIMPLE = T / file does conform to FITS standard',
'BITPIX = 16 / number of bits per data pixel'])
"""
if isinstance(shape, types.IntType):
shape = (shape,)
if buffer is None or _na.SuitableBuffer(buffer):
return kind(buffer, itemsize=itemsize, shape=shape,
byteoffset=byteoffset, bytestride=bytestride,
padc=padc)
if byteoffset or bytestride is not None:
raise ValueError('Offset and stride can only be specified if "buffer" is a buffer or None')
if isString(buffer):
return fromstring(buffer, itemsize=itemsize, shape=shape,
padc=padc, kind=kind)
elif isPySequence(buffer):
return fromlist(buffer, itemsize=itemsize, shape=shape,
padc=padc, kind=kind)
elif isinstance(buffer, kind) and buffer.__class__ is kind:
return buffer.copy()
elif isinstance(buffer, RawCharArray):
return kind(buffer=buffer._data,
itemsize=itemsize or buffer._itemsize,
shape=shape or buffer._shape,
padc=padc)
elif isinstance(buffer, types.FileType):
return fromfile(buffer, itemsize=itemsize, shape=shape,
padc=padc, kind=kind)
else:
raise TypeError("Don't know how to handle that kind of buffer")
def asarray(buffer=None, itemsize=None, shape=None, byteoffset=0,
bytestride=None, padc=" ", kind=CharArray):
"""massages a sequence into a chararray.
If buffer is *already* a chararray of the appropriate kind, it is
returned unaltered.
"""
if isinstance(buffer, kind) and buffer.__class__ is kind:
return buffer
else:
return array(buffer, itemsize, shape, byteoffset, bytestride,
padc, kind)
inputarray = asarray # obosolete synonym
def take(array, indices, outarr=None, axis=0, clipmode=_na.RAISE):
a = asarray(array)
return _gen.take(a, indices, outarr, axis, clipmode)
take.__doc__ = _gen.take.__doc__
def put(array, indices, values, axis=0, clipmode=_na.RAISE):
a = asarray(array)
v = asarray(values)
return _gen.put(a, indices, v, axis, clipmode)
put.__doc__ = _gen.put.__doc__
def _bufferItems(buffer, offset=0, bytestride=None, itemsize=None):
"""
>>> _bufferItems(buffer("0123456789"), offset=2, bytestride=4, itemsize=1)
2
>>> _bufferItems(buffer("0123456789"), offset=1, bytestride=2, itemsize=1)
5
>>> _bufferItems(buffer("0123456789"), bytestride=5, itemsize=2)
2
>>> _bufferItems(buffer("0123456789"), bytestride=None, itemsize=2)
5
>>> _bufferItems(buffer("0123456789"), offset=3)
7
>>> _bufferItems(buffer("abcdsedxxxxxncxn"),itemsize=2,offset=4,bytestride=8)
2
>>> cc=CharArray(buffer('abcdef'*5),shape=(2,),itemsize=2,byteoffset=8,bytestride=20,aligned=0)
>>> cc
CharArray(['cd', 'ef'])
"""
if bytestride is None:
if itemsize is None:
return len(buffer) - offset
else:
if itemsize:
return (len(buffer)-offset)/itemsize
else:
return 1 # Hack to permit 0 length strings
else:
if itemsize is None:
raise ValueError("Must specify itemsize if bytestride is specified.")
strides = (len(buffer)-offset)*1.0/bytestride
istrides = int(strides)
fstrides = int((strides-istrides)*bytestride+0.5)
return istrides + (fstrides >= itemsize)
def num2char(n, format, itemsize=32):
"""num2char formats NumArray 'num' into a CharArray using 'format'
>>> num2char(_na.arange(0.0,5), '%2.2f')
CharArray(['0.00', '1.00', '2.00', '3.00', '4.00'])
>>> num2char(_na.arange(0.0,5), '%d')
CharArray(['0', '1', '2', '3', '4'])
>>> num2char(_na.arange(5), "%02d")
CharArray(['00', '01', '02', '03', '04'])
Limitations:
1. When formatted values are too large to fit into strings of
length itemsize, the values are truncated, possibly losing
significant information.
2. Complex numbers are not supported.
"""
n = _na.asarray(n)
if isinstance(n.type(), _nt.ComplexType):
raise NotImplementedError("num2char doesn't support complex types yet.")
if n.type() == _na.Float64:
wnum = n
else:
wnum = n.astype(_na.Float64)
char = CharArray(shape=n.getshape(), itemsize=itemsize)
_chararray.Format(format, wnum, char)
return char
def _nothing(*args): return 0 # Test everything... nothing private.
def test():
if sys.version_info < (2,4):
import doctest, strings
return doctest.testmod(strings, isprivate=_nothing)
else:
import numarray.numtest as nt, strings
t = nt.Tester(globs=globals())
t.rundoc(strings)
return t.summarize()
if __name__ == "__main__":
test()
syntax highlighted by Code2HTML, v. 0.9.1