/* vi:ts=4:sw=4
* NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
*
* This is NOT the original regular expression code as written by
* Henry Spencer. This code has been modified specifically for use
* with the VIM editor, and should not be used apart from compiling
* VIM. If you want a good regular expression library, get the
* original code. The copyright notice that follows is from the
* original.
*
* Further this code is modified to work on Japanease letters.
*
* NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
*
* regsub
*
* Copyright (c) 1986 by University of Toronto.
* Written by Henry Spencer. Not derived from licensed software.
*
* Permission is granted to anyone to use this software for any
* purpose on any computer system, and to redistribute it freely,
* subject to the following restrictions:
*
* 1. The author is not responsible for the consequences of use of
* this software, no matter how awful, even if they arise
* from defects in it.
*
* 2. The origin of this software must not be misrepresented, either
* by explicit claim or by omission.
*
* 3. Altered versions must be plainly marked as such, and must not
* be misrepresented as being the original software.
*
* $Log: regsub.c,v $
* Revision 1.2 88/04/28 08:11:25 tony
* First modification of the regexp library. Added an external variable
* 'reg_ic' which can be set to indicate that case should be ignored.
* Added a new parameter to regexec() to indicate that the given string
* comes from the beginning of a line and is thus eligible to match
* 'beginning-of-line'.
*
* Revisions by Olaf 'Rhialto' Seibert, rhialto@cs.kun.nl:
* Changes for vi: (the semantics of several things were rather different)
* - Added lexical analyzer, because in vi magicness of characters
* is rather difficult, and may change over time.
* - Added support for \< \> \1-\9 and ~
* - Left some magic stuff in, but only backslashed: \| \+
* - * and \+ still work after \) even though they shouldn't.
*/
#include "vim.h"
#include "globals.h"
#include "proto.h"
#ifdef JP
#include "jp.h"
#include "ops.h"
#endif
#ifdef MSDOS
# define __ARGS(a) a
#endif
#define CASECONVERT
#include <stdio.h>
#include "regexp.h"
#include "regmagic.h"
#ifdef LATTICE
# include <sys/types.h> /* for size_t */
#endif
#ifndef CHARBITS
#define UCHARAT(p) ((int)*(unsigned char *)(p))
#else
#define UCHARAT(p) ((int)*(p)&CHARBITS)
#endif
extern char *reg_prev_sub;
#ifdef CASECONVERT
/*
* We should define ftpr as a pointer to a function returning a pointer to
* a function returning a pointer to a function ...
* This is impossible, so we declare a pointer to a function returning a
* pointer to a function returning void. This should work for all compilers.
*/
typedef void (*(*fptr) __ARGS((char *, char *)))();
static fptr strnfcpy __ARGS((fptr, char *, char *, int));
static fptr do_copy __ARGS((char *, char *));
static fptr do_upper __ARGS((char *, char *));
static fptr do_Upper __ARGS((char *, char *));
static fptr do_lower __ARGS((char *, char *));
static fptr do_Lower __ARGS((char *, char *));
static fptr
do_copy(d, s)
char *d, *s;
{
#ifdef JP
if (IsKanji(*s))
{
*d++ = *s++;
*d = *s;
}
else
#endif
*d = *s;
return (fptr)do_copy;
}
static fptr
do_upper(d, s)
char *d, *s;
{
#ifdef JP
if (IsKanji(*s))
{
*d = *s;
*(d + 1) = *(s + 1);
jptocase(d, d + 1, UPPER);
}
else
#endif
*d = TO_UPPER(*s);
return (fptr)do_copy;
}
static fptr
do_Upper(d, s)
char *d, *s;
{
#ifdef JP
do_upper(d, s);
#else
*d = TO_UPPER(*s);
#endif
return (fptr)do_Upper;
}
static fptr
do_lower(d, s)
char *d, *s;
{
#ifdef JP
if (IsKanji(*s))
{
*d = *s;
*(d + 1) = *(s + 1);
jptocase(d, d + 1, LOWER);
}
else
#endif
*d = TO_LOWER(*s);
return (fptr)do_copy;
}
static fptr
do_Lower(d, s)
char *d, *s;
{
#ifdef JP
do_lower(d, s);
#else
*d = TO_LOWER(*s);
#endif
return (fptr)do_Lower;
}
static fptr
strnfcpy(f, d, s, n)
fptr f;
char *d;
char *s;
int n;
{
while (n-- > 0) {
#ifdef JP
if (IsKanji(*s))
{
f = (fptr)(f(d, s));
s += 2;
d += 2;
n--;
continue;
}
#endif
f = (fptr)(f(d, s)); /* Turbo C complains without the typecast */
if (!*s++)
break;
d++;
}
return f;
}
#endif
/*
* regtilde: replace tildes in the pattern by the old pattern
*
* Short explanation of the tilde: it stands for the previous replacement
* pattern. If that previous pattern also contains a ~ we should go back
* a step further... but we insert the previous pattern into the current one
* and remember that.
* This still does not handle the case where "magic" changes. TODO?
*
* New solution: The tilde's are parsed once before the first call to regsub().
* In the old solution (tilde handled in regsub()) is was possible to get an
* endless loop.
*/
char *
regtilde(source, magic)
char *source;
int magic;
{
char *newsub = NULL;
char *tmpsub;
char *p;
int len;
int prevlen;
for (p = source; *p; ++p)
{
if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
{
if (reg_prev_sub)
{
/* length = len(current) - 1 + len(previous) + 1 */
prevlen = strlen(reg_prev_sub);
tmpsub = alloc((unsigned)(strlen(source) + prevlen));
if (tmpsub)
{
/* copy prefix */
len = (int)(p - source); /* not including ~ */
strncpy(tmpsub, source, (size_t)len);
/* interpretate tilde */
strcpy(tmpsub + len, reg_prev_sub);
/* copy postfix */
if (!magic)
++p; /* back off \ */
strcat(tmpsub + len, p + 1);
free(newsub);
newsub = tmpsub;
p = newsub + len + prevlen;
}
}
else if (magic)
strcpy(p, p + 1); /* remove '~' */
else
strcpy(p, p + 2); /* remove '\~' */
}
else if (*p == '\\' && p[1]) /* skip escaped characters */
++p;
}
free(reg_prev_sub);
if (newsub)
{
source = newsub;
reg_prev_sub = newsub;
}
else
reg_prev_sub = strsave(source);
return source;
}
/*
- regsub - perform substitutions after a regexp match
*
* Returns the size of the replacement, including terminating \0.
*/
int
regsub(prog, source, dest, copy, magic)
regexp *prog;
char *source;
char *dest;
int copy;
int magic;
{
register char *src;
register char *dst;
register char c;
register int no;
register int len;
#ifdef CASECONVERT
fptr func = (fptr)do_copy;
#endif
if (prog == NULL || source == NULL || dest == NULL)
{
emsg(e_null);
return 0;
}
if (UCHARAT(prog->program) != MAGIC)
{
emsg(e_re_corr);
return 0;
}
src = source;
dst = dest;
while ((c = *src++) != '\0')
{
no = -1;
if (c == '&' && magic)
no = 0;
else if (c == '\\')
{
if (*src == '&' && !magic)
{
++src;
no = 0;
}
else if ('0' <= *src && *src <= '9')
{
no = *src++ - '0';
}
#ifdef CASECONVERT
else if (strchr("uUlLeE", *src))
{
switch (*src++)
{
case 'u': func = (fptr)do_upper;
continue;
case 'U': func = (fptr)do_Upper;
continue;
case 'l': func = (fptr)do_lower;
continue;
case 'L': func = (fptr)do_Lower;
continue;
case 'e':
case 'E': func = (fptr)do_copy;
continue;
}
}
#endif
}
if (no < 0) /* Ordinary character. */
{
if (c == '\\')
c = *src++;
if (copy)
{
#ifdef CASECONVERT
func = (fptr)(func(dst, src - 1));
/* Turbo C complains without the typecast */
# ifdef JP
if (IsKanji(c))
{
dst++;
src++;
}
# endif
#else
*dst = c;
#endif
}
dst++;
}
else if (prog->startp[no] != NULL && prog->endp[no] != NULL)
{
len = (int)(prog->endp[no] - prog->startp[no]);
if (copy)
{
#ifdef CASECONVERT
func = strnfcpy(func, dst, prog->startp[no], len);
#else
(void) strncpy(dst, prog->startp[no], len);
#endif
}
dst += len;
if (copy && len != 0 && *(dst - 1) == '\0') { /* strncpy hit NUL. */
emsg(e_re_damg);
goto exit;
}
}
}
if (copy)
*dst = '\0';
exit:
return (int)((dst - dest) + 1);
}
syntax highlighted by Code2HTML, v. 0.9.1