/*
 *	vii - buffer and display output
 *	Copyright (C) 1991-1995, 1999, 2005 Peter Miller
 *
 *	This program is free software; you can redistribute it and/or modify
 *	it under the terms of the GNU General Public License as published by
 *	the Free Software Foundation; either version 2 of the License, or
 *	(at your option) any later version.
 *
 *	This program is distributed in the hope that it will be useful,
 *	but WITHOUT ANY WARRANTY; without even the implied warranty of
 *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *	GNU General Public License for more details.
 *
 *	You should have received a copy of the GNU General Public License
 *	along with this program; if not, write to the Free Software
 *	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
 *
 *
 * MANIFEST: string manipulation functions
 *
 * A literal pool is maintained.  Each string has a reference count.  The
 * string stays in the literal pool for as long as it has a positive
 * reference count.  To determine if a string is already in the literal pool,
 * linear dynamic hashing is used to guarantee an O(1) search.  Making all equal
 * strings the same item in the literal pool means that string equality is
 * a pointer test, and thus very fast.
 */

#include <ac/ctype.h>
#include <ac/stdarg.h>
#include <ac/stddef.h>
#include <ac/stdio.h>
#include <ac/stdlib.h>
#include <ac/string.h>

#include <error.h>
#include <mem.h>
#include <mprintf.h>
#include <str.h>


/*
 * maximum conversion width for numbers
 */
#define MAX_WIDTH 509

static	string_ty	**hash_table;
static	str_hash_ty	hash_modulus;
static	str_hash_ty	hash_cutover;
static	str_hash_ty	hash_cutover_mask;
static	str_hash_ty	hash_cutover_split_mask;
static	str_hash_ty	hash_split;
static	str_hash_ty	hash_load;
static	int		changed;

#define MAX_HASH_LEN 20


/*
 * NAME
 *	hash_generate - hash string to number
 *
 * SYNOPSIS
 *	str_hash_ty hash_generate(char *s, size_t n);
 *
 * DESCRIPTION
 *	The hash_generate function is used to make a number from a string.
 *
 * RETURNS
 *	str_hash_ty - the magic number
 *
 * CAVEAT
 *	Only the last MAX_HASH_LEN characters are used.
 *	It is important that str_hash_ty be unsigned (int or long).
 */

static str_hash_ty hash_generate _((char *, size_t));

static str_hash_ty
hash_generate(s, n)
	char		*s;
	size_t		n;
{
	str_hash_ty	retval;

	if (n > MAX_HASH_LEN)
	{
		s += n - MAX_HASH_LEN;
		n = MAX_HASH_LEN;
	}

	retval = 0;
	while (n > 0)
	{
		retval = (retval + (retval << 1)) ^ *s++;
		--n;
	}
	return retval;
}


/*
 * NAME
 *	str_initialize - start up string table
 *
 * SYNOPSIS
 *	void str_initialize(void);
 *
 * DESCRIPTION
 *	The str_initialize function is used to create the hash table and
 *	initialize it to empty.
 *
 * RETURNS
 *	void
 *
 * CAVEAT
 *	This function must be called before any other defined in this file.
 */

void
str_initialize()
{
	str_hash_ty	j;

	hash_modulus = 1<<8; /* MUST be a power of 2 */
	hash_cutover = hash_modulus;
	hash_split = hash_modulus - hash_cutover;
	hash_cutover_mask = hash_cutover - 1;
	hash_cutover_split_mask = (hash_cutover * 2) - 1;
	hash_load = 0;
	hash_table = (string_ty **)mem_alloc(hash_modulus * sizeof(string_ty *));
	for (j = 0; j < hash_modulus; ++j)
		hash_table[j] = 0;
}


/*
 * NAME
 *	split - reduce table loading
 *
 * SYNOPSIS
 *	void split(void);
 *
 * DESCRIPTION
 *	The split function is used to reduce the load factor on the hash table.
 *
 * RETURNS
 *	void
 *
 * CAVEAT
 *	A load factor of about 80% is suggested.
 */

static void split _((void));

static void
split()
{
	string_ty	*p;
	string_ty	*p2;
	str_hash_ty	idx;

	/*
	 * get the list to be split across buckets
	 */
	p = hash_table[hash_split];
	hash_table[hash_split] = 0;

	/*
	 * increase the modulus by one
	 */
	hash_modulus++;
	hash_table =
		mem_change_size(hash_table, hash_modulus * sizeof(string_ty*));
	hash_table[hash_modulus - 1] = 0;
	hash_split = hash_modulus - hash_cutover;
	if (hash_split >= hash_cutover)
	{
		hash_cutover = hash_modulus;
		hash_split = 0;
		hash_cutover_mask = hash_cutover - 1;
		hash_cutover_split_mask = (hash_cutover * 2) - 1;
	}

	/*
	 * now redistribute the list elements
	 */
	while (p)
	{
		p2 = p;
		p = p->str_next;

		idx = p2->str_hash & hash_cutover_mask;
		if (idx < hash_split)
			idx = p2->str_hash & hash_cutover_split_mask;
		p2->str_next = hash_table[idx];
		hash_table[idx] = p2;
	}
}


/*
 * NAME
 *	str_from_c - make string from C string
 *
 * SYNOPSIS
 *	string_ty *str_from_c(char*);
 *
 * DESCRIPTION
 *	The str_from_c function is used to make a string from a null terminated
 *	C string.
 *
 * RETURNS
 *	string_ty* - a pointer to a string in dynamic memory.  Use str_free when
 *	finished with.
 *
 * CAVEAT
 *	The contents of the structure pointed to MUST NOT be altered.
 */

string_ty *
str_from_c(s)
	char		*s;
{
	return str_n_from_c(s, strlen(s));
}


/*
 * NAME
 *	str_n_from_c - make string
 *
 * SYNOPSIS
 *	string_ty *str_n_from_c(char *s, size_t n);
 *
 * DESCRIPTION
 *	The str_n_from_c function is used to make a string from an array of
 *	characters.  No null terminator is assumed.
 *
 * RETURNS
 *	string_ty* - a pointer to a string in dynamic memory.  Use str_free when
 *	finished with.
 *
 * CAVEAT
 *	The contents of the structure pointed to MUST NOT be altered.
 */

string_ty *
str_n_from_c(s, length)
	char		*s;
	size_t		length;
{
	str_hash_ty	hash;
	str_hash_ty	idx;
	string_ty	*p;

	hash = hash_generate(s, length);

	idx = hash & hash_cutover_mask;
	if (idx < hash_split)
		idx = hash & hash_cutover_split_mask;

	for (p = hash_table[idx]; p; p = p->str_next)
	{
		if
		(
			p->str_hash == hash
		&&
			p->str_length == length
		&&
			!memcmp(p->str_text, s, length)
		)
		{
			p->str_references++;
			return p;
		}
	}

	p = (string_ty *)mem_alloc(sizeof(string_ty) + length);
	p->str_hash = hash;
	p->str_length = length;
	p->str_references = 1;
	p->str_next = hash_table[idx];
	hash_table[idx] = p;
	memcpy(p->str_text, s, length);
	p->str_text[length] = 0;

	hash_load++;
	while (hash_load * 10 > hash_modulus * 8)
		split();
	++changed;
	return p;
}


/*
 * NAME
 *	str_copy - make a copy of a string
 *
 * SYNOPSIS
 *	string_ty *str_copy(string_ty *s);
 *
 * DESCRIPTION
 *	The str_copy function is used to make a copy of a string.
 *
 * RETURNS
 *	string_ty* - a pointer to a string in dynamic memory.  Use str_free when
 *	finished with.
 *
 * CAVEAT
 *	The contents of the structure pointed to MUST NOT be altered.
 */

string_ty *
str_copy(s)
	string_ty	*s;
{
	s->str_references++;
	return s;
}


/*
 * NAME
 *	str_free - release a string
 *
 * SYNOPSIS
 *	void str_free(string_ty *s);
 *
 * DESCRIPTION
 *	The str_free function is used to indicate that a string hash been
 *	finished with.
 *
 * RETURNS
 *	void
 *
 * CAVEAT
 *	This is the only way to release strings DO NOT use the free function.
 */

void
str_free(s)
	string_ty	*s;
{
	str_hash_ty	idx;
	string_ty	**spp;

	if (!s)
		return;
	if (s->str_references > 1)
	{
		s->str_references--;
		return;
	}
	++changed;

	/*
	 * find the hash bucket it was in,
	 * and remove it
	 */
	idx = s->str_hash & hash_cutover_mask;
	if (idx < hash_split)
		idx = s->str_hash & hash_cutover_split_mask;
	for (spp = &hash_table[idx]; *spp; spp = &(*spp)->str_next)
	{
		if (*spp == s)
		{
			*spp = s->str_next;
			free(s);
			--hash_load;
			return;
		}
	}

	/*
	 * should never reach here!
	 */
	fatal("attempted to free non-existent string (bug)");
}


/*
 * NAME
 *	str_catenate - join two strings
 *
 * SYNOPSIS
 *	string_ty *str_catenate(string_ty *, string_ty *);
 *
 * DESCRIPTION
 *	The str_catenate function is used to concatenate two strings to form a
 *	new string.
 *
 * RETURNS
 *	string_ty* - a pointer to a string in dynamic memory.  Use str_free when
 *	finished with.
 *
 * CAVEAT
 *	The contents of the structure pointed to MUST NOT be altered.
 */

string_ty *
str_catenate(s1, s2)
	string_ty	*s1;
	string_ty	*s2;
{
	static char	*tmp;
	static size_t	tmplen;
	string_ty	*s;
	size_t		length;

	length = s1->str_length + s2->str_length;
	if (!tmp)
	{
		tmplen = length;
		if (tmplen < 16)
			tmplen = 16;
		tmp = mem_alloc(tmplen);
	}
	else
	{
		if (tmplen < length)
		{
			tmplen = length;
			tmp = mem_change_size(tmp, tmplen);
		}
	}
	memcpy(tmp, s1->str_text, s1->str_length);
	memcpy(tmp + s1->str_length, s2->str_text, s2->str_length);
	s = str_n_from_c(tmp, length);
	return s;
}


/*
 * NAME
 *	str_cat_three - join three strings
 *
 * SYNOPSIS
 *	string_ty *str_cat_three(string_ty *, string_ty *, string_ty *);
 *
 * DESCRIPTION
 *	The str_cat_three function is used to concatenate three strings to form
 *	a new string.
 *
 * RETURNS
 *	string_ty* - a pointer to a string in dynamic memory.  Use str_free when
 *	finished with.
 *
 * CAVEAT
 *	The contents of the structure pointed to MUST NOT be altered.
 */

string_ty *
str_cat_three(s1, s2, s3)
	string_ty	*s1;
	string_ty	*s2;
	string_ty	*s3;
{
	static char	*tmp;
	static size_t	tmplen;
	string_ty	*s;
	size_t		length;

	length = s1->str_length + s2->str_length + s3->str_length;
	if (!tmp)
	{
		tmplen = length;
		if (tmplen < 16)
			tmplen = 16;
		tmp = mem_alloc(tmplen);
	}
	else
	{
		if (tmplen < length)
		{
			tmplen = length;
			tmp = mem_change_size(tmp, tmplen);
		}
	}
	memcpy(tmp, s1->str_text, s1->str_length);
	memcpy(tmp + s1->str_length, s2->str_text, s2->str_length);
	memcpy
	(
		tmp + s1->str_length + s2->str_length,
		s3->str_text,
		s3->str_length
	);
	s = str_n_from_c(tmp, length);
	return s;
}


/*
 * NAME
 *	str_equal - test equality of strings
 *
 * SYNOPSIS
 *	int str_equal(string_ty *, string_ty *);
 *
 * DESCRIPTION
 *	The str_equal function is used to test if two strings are equal.
 *
 * RETURNS
 *	int; zero if the strings are not equal, nonzero if the strings are
 *	equal.
 *
 * CAVEAT
 *	This function is implemented as a macro in strings.h
 */

#ifndef str_equal

int
str_equal(s1, s2)
	string_ty	*s1;
	string_ty	*s2;
{
	return (s1 == s2);
}

#endif


/*
 * NAME
 *	str_upcase - upcase a string
 *
 * SYNOPSIS
 *	string_ty *str_upcase(string_ty *);
 *
 * DESCRIPTION
 *	The str_upcase function is used to form a string which is an upper case
 *	form of the supplied string.
 *
 * RETURNS
 *	string_ty* - a pointer to a string in dynamic memory.  Use str_free when
 *	finished with.
 *
 * CAVEAT
 *	The contents of the structure pointed to MUST NOT be altered.
 */

string_ty *
str_upcase(s)
	string_ty	*s;
{
	static char	*tmp;
	static size_t	tmplen;
	string_ty	*retval;
	char		*cp1;
	char		*cp2;

	if (!tmp)
	{
		tmplen = s->str_length;
		if (tmplen < 16)
			tmplen = 16;
		tmp = mem_alloc(tmplen);
	}
	else
	{
		if (tmplen < s->str_length)
		{
			tmplen = s->str_length;
			tmp = mem_change_size(tmp, tmplen);
		}
	}
	for (cp1 = s->str_text, cp2 = tmp; *cp1; ++cp1, ++cp2)
	{
		int c;

		c = *cp1;
		if (c >= 'a' && c <= 'z')
			c += 'A' - 'a';
		*cp2 = c;
	}
	retval = str_n_from_c(tmp, s->str_length);
	return retval;
}


/*
 * NAME
 *	str_downcase - lowercase string
 *
 * SYNOPSIS
 *	string_ty *str_downcase(string_ty *);
 *
 * DESCRIPTION
 *	The str_downcase function is used to form a string which is a lowercase
 *	form of the supplied string.
 *
 * RETURNS
 *	string_ty* - a pointer to a string in dynamic memory.  Use str_free when
 *	finished with.
 *
 * CAVEAT
 *	The contents of the structure pointed to MUST NOT be altered.
 */

string_ty *
str_downcase(s)
	string_ty	*s;
{
	static char	*tmp;
	static size_t	tmplen;
	string_ty	*retval;
	char		*cp1;
	char		*cp2;

	if (!tmp)
	{
		tmplen = s->str_length;
		if (tmplen < 16)
			tmplen = 16;
		tmp = mem_alloc(tmplen);
	}
	else
	{
		if (tmplen < s->str_length)
		{
			tmplen = s->str_length;
			tmp = mem_change_size(tmp, tmplen);
		}
	}
	for (cp1 = s->str_text, cp2 = tmp; *cp1; ++cp1, ++cp2)
	{
		int c;

		c = *cp1;
		if (c >= 'A' && c <= 'Z')
			c += 'a' - 'A';
		*cp2 = c;
	}
	retval = str_n_from_c(tmp, s->str_length);
	return retval;
}


/*
 * NAME
 *	str_bool - get boolean value
 *
 * SYNOPSIS
 *	int str_bool(string_ty *s);
 *
 * DESCRIPTION
 *	The str_bool function is used to determine the boolean value of the
 *	given string.  A "false" result is if the string is empty or
 *	0 or blank, and "true" otherwise.
 *
 * RETURNS
 *	int: zero to indicate a "false" result, nonzero to indicate a "true"
 *	result.
 */

int
str_bool(s)
	string_ty	*s;
{
	char		*cp;

	cp = s->str_text;
	while (*cp)
	{
		if (!isspace(*cp) && *cp != '0')
			return 1;
		++cp;
	}
	return 0;
}


/*
 * NAME
 *	str_field - extract a field from a string
 *
 * SYNOPSIS
 *	string_ty *str_field(string_ty *, char separator, int field_number);
 *
 * DESCRIPTION
 *	The str_field functipon is used to erxtract a field from a string.
 *	Fields of the string are separated by ``separator'' characters.
 *	Fields are numbered from 0.
 *
 * RETURNS
 *	Asking for a field off the end of the string will result in a null
 *	pointer return.  The null string is considered to have one empty field.
 */

string_ty *
str_field(s, sep, fldnum)
	string_ty	*s;
	int		sep;
	int		fldnum;
{
	char		*cp;
	char		*ep;

	cp = s->str_text;
	while (fldnum > 0)
	{
		ep = strchr(cp, sep);
		if (!ep)
			return 0;
		cp = ep + 1;
		--fldnum;
	}
	ep = strchr(cp, sep);
	if (ep)
		return str_n_from_c(cp, ep - cp);
	return str_from_c(cp);
}


void
slow_to_fast(in, out, length)
	char		**in;
	string_ty	**out;
	size_t		length;
{
	size_t		j;

	if (out[0])
		return;
	for (j = 0; j < length; ++j)
		out[j] = str_from_c(in[j]);
}


string_ty *
str_format(fmt sva_last)
	char		*fmt;
	sva_last_decl
{
	va_list		ap;
	string_ty	*result;

	sva_init(ap, fmt);
	result = vmprintf_str(fmt, ap);
	va_end(ap);
	return result;
}


string_ty *
str_vformat(fmt, ap)
	char		*fmt;
	va_list		ap;
{
	return vmprintf_str(fmt, ap);
}


syntax highlighted by Code2HTML, v. 0.9.1