// -*- c-basic-offset: 4; related-file-name: "../include/click/string.hh" -*-
/*
* string.{cc,hh} -- a String class with shared substrings
* Eddie Kohler
*
* Copyright (c) 1999-2000 Massachusetts Institute of Technology
* Copyright (c) 2004-2005 Regents of the University of California
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, subject to the conditions
* listed in the Click LICENSE file. These conditions include: you must
* preserve this copyright notice, and you cannot mention the copyright
* holders in advertising related to the Software without their permission.
* The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This
* notice is a summary of the Click LICENSE file; the license in that file is
* legally binding.
*/
#include <click/config.h>
#include <click/string.hh>
#include <click/straccum.hh>
#include <click/glue.hh>
CLICK_DECLS
/** @class String
* @brief A string of characters.
*
* The String class represents a string of characters. Strings may be
* constructed from C strings, characters, numbers, and so forth. They may
* also be added together. The underlying character arrays are dynamically
* allocated; String operations allocate and free memory as needed. A String
* and its substrings generally share memory. Accessing a character by index
* takes O(1) time; so does creating a substring.
*
* <h3>Initialization</h3>
*
* The String implementation must be explicitly initialized before use; see
* static_initialize(). Explicit initialization is used because static
* constructors and other automatic initialization tricks don't work in the
* kernel. However, at user level, you can declare a String::Initializer
* object to initialize the library.
*
* <h3>Out-of-memory strings</h3>
*
* When there is not enough memory to create a particular string, a special
* "out-of-memory" string is returned instead. Out-of-memory strings are
* contagious: the result of any concatenation operation involving an
* out-of-memory string is another out-of-memory string. Thus, the final
* result of a series of String operations will be an out-of-memory string,
* even if the out-of-memory condition occurs in the middle.
*
* Out-of-memory strings have zero characters, but they aren't equal to other
* empty strings. If @a s is a normal String (even an empty string), and @a
* oom is an out-of-memory string, then @a s @< @a oom.
*
* All out-of-memory strings are equal and share the same data(), which is
* different from the data() of any other string. See
* String::out_of_memory_data(). The String::out_of_memory_string() function
* returns an out-of-memory string.
*/
String::Memo *String::null_memo = 0;
String::Memo *String::permanent_memo = 0;
String::Memo *String::oom_memo = 0;
String *String::null_string_p = 0;
String *String::oom_string_p = 0;
const char String::oom_string_data = 0;
/** @cond never */
inline
String::Memo::Memo()
: _refcount(0), _capacity(0), _dirty(0), _real_data("")
{
}
inline
String::Memo::Memo(char *data, int dirty, int capacity)
: _refcount(0), _capacity(capacity), _dirty(dirty),
_real_data(data)
{
}
String::Memo::Memo(int dirty, int capacity)
: _refcount(1), _capacity(capacity), _dirty(dirty),
_real_data(new char[capacity])
{
assert(_capacity >= _dirty);
}
String::Memo::~Memo()
{
if (_capacity) {
assert(_capacity >= _dirty);
delete[] _real_data;
}
}
/** @endcond never */
/** @brief Create a String containing the ASCII base-10 representation of @a i.
*/
String::String(int i)
{
char buf[128];
sprintf(buf, "%d", i);
assign(buf, -1);
}
/** @brief Create a String containing the ASCII base-10 representation of @a u.
*/
String::String(unsigned u)
{
char buf[128];
sprintf(buf, "%u", u);
assign(buf, -1);
}
/** @brief Create a String containing the ASCII base-10 representation of @a i.
*/
String::String(long i)
{
char buf[128];
sprintf(buf, "%ld", i);
assign(buf, -1);
}
/** @brief Create a String containing the ASCII base-10 representation of @a u.
*/
String::String(unsigned long u)
{
char buf[128];
sprintf(buf, "%lu", u);
assign(buf, -1);
}
#if HAVE_INT64_TYPES && !HAVE_INT64_IS_LONG
// Implemented a lovely [u]int64_t converter in StringAccum
// (use the code even at user level to hunt out bugs)
/** @brief Create a String containing the ASCII base-10 representation of @a q.
*/
String::String(int64_t q)
{
StringAccum sa;
sa << q;
assign(sa.data(), sa.length());
}
/** @brief Create a String containing the ASCII base-10 representation of @a q.
*/
String::String(uint64_t q)
{
StringAccum sa;
sa << q;
assign(sa.data(), sa.length());
}
#endif
#ifdef CLICK_USERLEVEL
/** @brief Create a String containing the ASCII base-10 representation of @a d.
* @note This function is only available at user level.
*/
String::String(double d)
{
char buf[128];
int len = sprintf(buf, "%.12g", d);
assign(buf, len);
}
#endif
String
String::claim_string(char *str, int len, int capacity)
{
assert(str && len > 0 && capacity >= len);
Memo *new_memo = new Memo(str, len, capacity);
if (new_memo)
return String(str, len, new_memo);
else
return String(&oom_string_data, 0, oom_memo);
}
/** @brief Return a String that directly references the first @a len
* characters of @a s.
*
* This function is suitable for static constant strings whose data is known
* to stay around forever, such as C string constants. If @a len @< 0, treats
* @a s as a null-terminated C string.
*/
String
String::stable_string(const char *s, int len)
{
if (len < 0)
len = (s ? strlen(s) : 0);
return String(s, len, permanent_memo);
}
/** @brief Create and return a String containing @a len random characters. */
String
String::garbage_string(int len)
{
String s;
s.append_garbage(len);
return s;
}
void
String::make_out_of_memory()
{
if (_memo)
deref();
_memo = oom_memo;
_memo->_refcount++;
_data = _memo->_real_data;
_length = 0;
}
void
String::assign(const char *str, int len)
{
if (!str) {
assert(len <= 0);
len = 0;
} else if (len < 0)
len = strlen(str);
if (len == 0) {
_memo = (str == &oom_string_data ? oom_memo : null_memo);
_memo->_refcount++;
} else {
// Make 'capacity' a multiple of 16 characters and bigger than 'len'.
int capacity = (len + 16) & ~15;
_memo = new Memo(len, capacity);
if (!_memo || !_memo->_real_data) {
make_out_of_memory();
return;
}
memcpy(_memo->_real_data, str, len);
}
_data = _memo->_real_data;
_length = len;
}
/** @brief Append @a len random characters to this string. */
char *
String::append_garbage(int len)
{
// Appending anything to "out of memory" leaves it as "out of memory"
if (len <= 0 || _memo == oom_memo)
return 0;
// If we can, append into unused space. First, we check that there's
// enough unused space for `len' characters to fit; then, we check
// that the unused space immediately follows the data in `*this'.
if (_memo->_capacity > _memo->_dirty + len) {
char *real_dirty = _memo->_real_data + _memo->_dirty;
if (real_dirty == _data + _length) {
_length += len;
_memo->_dirty += len;
assert(_memo->_dirty < _memo->_capacity);
return real_dirty;
}
}
// Now we have to make new space. Make sure the new capacity is a
// multiple of 16 characters and that it is at least 16.
int new_capacity = (_length + 16) & ~15;
while (new_capacity < _length + len)
new_capacity *= 2;
Memo *new_memo = new Memo(_length + len, new_capacity);
if (!new_memo || !new_memo->_real_data) {
delete new_memo;
make_out_of_memory();
return 0;
}
char *new_data = new_memo->_real_data;
memcpy(new_data, _data, _length);
deref();
_data = new_data;
new_data += _length; // now new_data points to the garbage
_length += len;
_memo = new_memo;
return new_data;
}
/** @brief Append the first @a len characters of @a suffix to this string.
*
* @param suffix data to append
* @param len length of data
*
* If @a len @< 0, treats @a suffix as a null-terminated C string. */
void
String::append(const char *suffix, int len)
{
if (!suffix) {
assert(len <= 0);
len = 0;
} else if (len < 0)
len = strlen(suffix);
if (suffix == &oom_string_data)
// Appending "out of memory" to a regular string makes it "out of
// memory"
make_out_of_memory();
else if (char *space = append_garbage(len))
memcpy(space, suffix, len);
}
/** @brief Append @a len copies of the character @a c to this string. */
void
String::append_fill(int c, int len)
{
assert(len >= 0);
if (char *space = append_garbage(len))
memset(space, c, len);
}
/** @brief Ensure the string's data is unshared and return a mutable pointer
* to it. */
char *
String::mutable_data()
{
// If _memo has a capacity (it's not one of the special strings) and it's
// uniquely referenced, return _data right away.
if (_memo->_capacity && _memo->_refcount == 1)
return const_cast<char *>(_data);
// Otherwise, make a copy of it. Rely on: deref() doesn't change _data or
// _length; and if _capacity == 0, then deref() doesn't free _real_data.
assert(!_memo->_capacity || _memo->_refcount > 1);
deref();
assign(_data, _length);
return const_cast<char *>(_data);
}
/** @brief Null-terminates the string and returns a mutable pointer to its
* data.
* @sa String::c_str */
char *
String::mutable_c_str()
{
(void) mutable_data();
(void) c_str();
return const_cast<char *>(_data);
}
/** @brief Null-terminates the string.
*
* The terminating null character isn't considered part of the string, so
* this->length() doesn't change. Returns a corresponding C string pointer.
* The returned pointer is semi-temporary; it will persist until the string is
* destroyed, or someone appends to it. */
const char *
String::c_str() const
{
// If _memo has no capacity, then this is one of the special strings (null
// or PermString). We are guaranteed, in these strings, that _data[_length]
// exists. We can return _data immediately if we have a '\0' in the right
// place.
if (!_memo->_capacity && _data[_length] == '\0')
return _data;
// Otherwise, this invariant must hold (there's more real data in _memo than
// in our substring).
assert(!_memo->_capacity
|| _memo->_real_data + _memo->_dirty >= _data + _length);
// Has the character after our substring been set?
if (_memo->_real_data + _memo->_dirty == _data + _length) {
// Character after our substring has not been set. May be able to change
// it to '\0'. This case will never occur on special strings.
if (_memo->_dirty < _memo->_capacity)
goto add_final_nul;
} else {
// Character after our substring has been set. OK to return _data if it is
// already '\0'.
if (_data[_length] == '\0')
return _data;
}
// If we get here, we must make a copy of our portion of the string.
{
String s(_data, _length);
deref();
assign(s);
}
add_final_nul:
char *real_data = const_cast<char *>(_data);
real_data[_length] = '\0';
_memo->_dirty++; // include '\0' in used portion of _memo
return _data;
}
/** @brief Returns a substring of this string, consisting of the @a len
* characters starting at index @a pos.
*
* @param pos substring's first position relative to the string.
* @param len length of the substring.
*
* If @a pos is negative, starts that far from the end of the string. If @a
* len is negative, leaves that many characters off the end of the string.
* (This follows perl's semantics.) Returns a null string if the adjusted @a
* pos is out of range. Truncates the substring if @a len goes beyond the end
* of the string.
*/
String
String::substring(int pos, int len) const
{
if (pos < 0)
pos += _length;
if (len < 0)
len = _length - pos + len;
if (pos + len > _length)
len = _length - pos;
if (pos < 0 || len <= 0)
return String();
else
return String(_data + pos, len, _memo);
}
/** @brief Search for a character in a string.
*
* @param c character to search for
* @param start initial search position
*
* Return the index of the leftmost occurence of @a c, starting at index @a
* start and working up to the end of the string. Returns -1 if @a c is not
* found. */
int
String::find_left(char c, int start) const
{
if (start < 0)
start = 0;
for (int i = start; i < _length; i++)
if (_data[i] == c)
return i;
return -1;
}
/** @brief Search for a substring in a string.
*
* @param str substring to search for
* @param start initial search position
*
* Return the index of the leftmost occurence of the substring @a str, starting
* at index @a start and working up to the end of the string. Returns -1 if
* @a str is not found. */
int
String::find_left(const String &str, int start) const
{
if (start < 0)
start = 0;
if (start >= length())
return -1;
if (!str.length())
return 0;
int first_c = (unsigned char)str[0];
int pos = start, max_pos = length() - str.length();
for (pos = find_left(first_c, pos); pos >= 0 && pos <= max_pos;
pos = find_left(first_c, pos + 1))
if (!memcmp(_data + pos, str._data, str.length()))
return pos;
return -1;
}
/** @brief Search for a character in a string.
*
* @param c character to search for
* @param start initial search position
*
* Return the index of the rightmost occurence of the character @a c, starting
* at index @a start and working back to the beginning of the string. Returns
* -1 if @a c is not found. @a start may start beyond the end of the
* string. */
int
String::find_right(char c, int start) const
{
if (start >= _length)
start = _length - 1;
for (int i = start; i >= 0; i--)
if (_data[i] == c)
return i;
return -1;
}
static String
hard_lower(const String &s, int pos)
{
String new_s(s.data(), s.length());
char *x = const_cast<char *>(new_s.data()); // know it's mutable
int len = s.length();
for (; pos < len; pos++)
x[pos] = tolower(x[pos]);
return new_s;
}
/** @brief Returns a lowercased version of this string.
*
* Translates the ASCII characters 'A' through 'Z' into their lowercase
* equivalents. */
String
String::lower() const
{
// avoid copies
for (int i = 0; i < _length; i++)
if (_data[i] >= 'A' && _data[i] <= 'Z')
return hard_lower(*this, i);
return *this;
}
static String
hard_upper(const String &s, int pos)
{
String new_s(s.data(), s.length());
char *x = const_cast<char *>(new_s.data()); // know it's mutable
int len = s.length();
for (; pos < len; pos++)
x[pos] = toupper(x[pos]);
return new_s;
}
/** @brief Returns an uppercased version of this string.
*
* Translates the ASCII characters 'a' through 'z' into their uppercase
* equivalents. */
String
String::upper() const
{
// avoid copies
for (int i = 0; i < _length; i++)
if (_data[i] >= 'a' && _data[i] <= 'z')
return hard_upper(*this, i);
return *this;
}
static String
hard_printable(const String &s, int pos)
{
StringAccum sa(s.length() * 2);
sa.append(s.data(), pos);
const unsigned char *x = reinterpret_cast<const unsigned char *>(s.data());
int len = s.length();
for (; pos < len; pos++) {
if (x[pos] >= 32 && x[pos] < 127)
sa << x[pos];
else if (x[pos] < 32)
sa << '^' << (unsigned char)(x[pos] + 64);
else if (char *buf = sa.extend(4, 1))
sprintf(buf, "\\%03o", x[pos]);
}
return sa.take_string();
}
/** @brief Returns a "printable" version of this string.
*
* Translates control characters 0-31 into "control" sequences, such as "^@"
* for the null character, and characters 127-255 into octal escape sequences,
* such as "\377" for 255. */
String
String::printable() const
{
// avoid copies
for (int i = 0; i < _length; i++)
if (_data[i] < 32 || _data[i] > 126)
return hard_printable(*this, i);
return *this;
}
/** @brief Returns a substring with spaces trimmed from the end. */
String
String::trim_space() const
{
for (int i = _length - 1; i >= 0; i--)
if (!isspace(_data[i]))
return substring(0, i + 1);
// return out-of-memory string if input is out-of-memory string
return (_length ? String() : *this);
}
/** @brief Returns a hex-quoted version of the string.
*
* For example, the string "Abcd" would convert to "\<41626364>". */
String
String::quoted_hex() const
{
static const char hex_digits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
StringAccum sa;
char *buf;
if (out_of_memory() || !(buf = sa.extend(length() * 2 + 3)))
return out_of_memory_string();
*buf++ = '\\';
*buf++ = '<';
const uint8_t *e = reinterpret_cast<const uint8_t*>(end());
for (const uint8_t *x = reinterpret_cast<const uint8_t*>(begin()); x < e; x++) {
*buf++ = hex_digits[(*x >> 4) & 0xF];
*buf++ = hex_digits[*x & 0xF];
}
*buf++ = '>';
return sa.take_string();
}
int
hashcode(const String &s)
{
int length = s.length();
const char *data = s.data();
if (!length)
return 0;
else if (length == 1)
return data[0] | (data[0] << 8);
else if (length < 4)
return data[0] + (data[1] << 3) + (length << 12);
else
return data[0] + (data[1] << 8) + (data[2] << 16) + (data[3] << 24)
+ (length << 12) + (data[length-1] << 10);
}
/** @brief Return true iff this string is equal to the data in @a s.
* @param s string data to compare to
* @param len length of @a s
*
* Same as String::compare(*this, String(s, len)) == 0. If @a len @< 0, then
* treats @a s as a null-terminated C string.
*
* @sa String::compare(const String &a, const String &b) */
bool
String::equals(const char *s, int len) const
{
// It'd be nice to make "out-of-memory" strings compare unequal to
// anything, even themseleves, but this would be a bad idea for Strings
// used as (for example) keys in hashtables. Instead, "out-of-memory"
// strings compare unequal to other null strings, but equal to each other.
if (len < 0)
len = strlen(s);
if (_length != len)
return false;
else if (_data == s)
return true;
else if (len == 0)
return (s != &oom_string_data && _memo != oom_memo);
else
return memcmp(_data, s, len) == 0;
}
/** @brief Compare this string with the data in @a s.
* @param s string data to compare to
* @param len length of @a s
*
* Same as String::compare(*this, String(s, len)). If @a len @< 0, then treats
* @a s as a null-terminated C string.
*
* @sa String::compare(const String &a, const String &b) */
int
String::compare(const char *s, int len) const
{
if (len < 0)
len = strlen(s);
if (_data == s)
return _length - len;
else if (_memo == oom_memo)
return 1;
else if (s == &oom_string_data)
return -1;
else if (_length == len)
return memcmp(_data, s, len);
else if (_length < len) {
int v = memcmp(_data, s, _length);
return (v ? v : -1);
} else {
int v = memcmp(_data, s, len);
return (v ? v : 1);
}
}
/** @class String::Initializer
* @brief Initializes the String implementation.
*
* This class's constructor initializes the String implementation by calling
* String::static_initialize(). You should declare a String::Initializer
* object at global scope in any file that declares a global String object.
* For example:
* @code
* static String::Initializer initializer;
* String global_string = "100";
* @endcode */
String::Initializer::Initializer()
{
String::static_initialize();
}
/** @brief Initialize the String implementation.
*
* This function must be called before any String functionality is used. It
* is safe to call it multiple times.
*
* @note Elements don't need to worry about static_initialize(); Click drivers
* have already called it for you.
*
* @sa String::Initializer */
void
String::static_initialize()
{
// function called to initialize static globals
if (!null_memo) {
#if CLICK_DMALLOC
CLICK_DMALLOC_REG("str0");
#endif
null_memo = new Memo;
null_memo->_refcount++;
permanent_memo = new Memo;
permanent_memo->_refcount++;
// use a separate string for oom_memo's data, so we can distinguish
// the pointer
oom_memo = new Memo;
oom_memo->_refcount++;
oom_memo->_real_data = const_cast<char*>(&oom_string_data);
null_string_p = new String;
oom_string_p = new String(&oom_string_data, 0, oom_memo);
#if CLICK_DMALLOC
CLICK_DMALLOC_REG("????");
#endif
}
}
/** @brief Clean up the String implementation.
*
* Call this function to release any memory allocated by the String
* implementation. */
void
String::static_cleanup()
{
if (null_string_p) {
delete null_string_p;
null_string_p = 0;
delete oom_string_p;
oom_string_p = 0;
if (--oom_memo->_refcount == 0)
delete oom_memo;
if (--permanent_memo->_refcount == 0)
delete permanent_memo;
if (--null_memo->_refcount == 0)
delete null_memo;
null_memo = permanent_memo = oom_memo = 0;
}
}
CLICK_ENDDECLS
syntax highlighted by Code2HTML, v. 0.9.1