/* vi:ts=4:sw=4 * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE * * This is NOT the original regular expression code as written by * Henry Spencer. This code has been modified specifically for use * with the VIM editor, and should not be used apart from compiling * VIM. If you want a good regular expression library, get the * original code. The copyright notice that follows is from the * original. * * Further this code is modified to work on Japanease letters. * * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE * * regsub * * Copyright (c) 1986 by University of Toronto. * Written by Henry Spencer. Not derived from licensed software. * * Permission is granted to anyone to use this software for any * purpose on any computer system, and to redistribute it freely, * subject to the following restrictions: * * 1. The author is not responsible for the consequences of use of * this software, no matter how awful, even if they arise * from defects in it. * * 2. The origin of this software must not be misrepresented, either * by explicit claim or by omission. * * 3. Altered versions must be plainly marked as such, and must not * be misrepresented as being the original software. * * $Log: regsub.c,v $ * Revision 1.2 88/04/28 08:11:25 tony * First modification of the regexp library. Added an external variable * 'reg_ic' which can be set to indicate that case should be ignored. * Added a new parameter to regexec() to indicate that the given string * comes from the beginning of a line and is thus eligible to match * 'beginning-of-line'. * * Revisions by Olaf 'Rhialto' Seibert, rhialto@cs.kun.nl: * Changes for vi: (the semantics of several things were rather different) * - Added lexical analyzer, because in vi magicness of characters * is rather difficult, and may change over time. * - Added support for \< \> \1-\9 and ~ * - Left some magic stuff in, but only backslashed: \| \+ * - * and \+ still work after \) even though they shouldn't. */ #include "vim.h" #include "globals.h" #include "proto.h" #ifdef JP #include "jp.h" #include "ops.h" #endif #ifdef MSDOS # define __ARGS(a) a #endif #define CASECONVERT #include <stdio.h> #include "regexp.h" #include "regmagic.h" #ifdef LATTICE # include <sys/types.h> /* for size_t */ #endif #ifndef CHARBITS #define UCHARAT(p) ((int)*(unsigned char *)(p)) #else #define UCHARAT(p) ((int)*(p)&CHARBITS) #endif extern char *reg_prev_sub; #ifdef CASECONVERT /* * We should define ftpr as a pointer to a function returning a pointer to * a function returning a pointer to a function ... * This is impossible, so we declare a pointer to a function returning a * pointer to a function returning void. This should work for all compilers. */ typedef void (*(*fptr) __ARGS((char *, char *)))(); static fptr strnfcpy __ARGS((fptr, char *, char *, int)); static fptr do_copy __ARGS((char *, char *)); static fptr do_upper __ARGS((char *, char *)); static fptr do_Upper __ARGS((char *, char *)); static fptr do_lower __ARGS((char *, char *)); static fptr do_Lower __ARGS((char *, char *)); static fptr do_copy(d, s) char *d, *s; { #ifdef JP if (IsKanji(*s)) { *d++ = *s++; *d = *s; } else #endif *d = *s; return (fptr)do_copy; } static fptr do_upper(d, s) char *d, *s; { #ifdef JP if (IsKanji(*s)) { *d = *s; *(d + 1) = *(s + 1); jptocase(d, d + 1, UPPER); } else #endif *d = TO_UPPER(*s); return (fptr)do_copy; } static fptr do_Upper(d, s) char *d, *s; { #ifdef JP do_upper(d, s); #else *d = TO_UPPER(*s); #endif return (fptr)do_Upper; } static fptr do_lower(d, s) char *d, *s; { #ifdef JP if (IsKanji(*s)) { *d = *s; *(d + 1) = *(s + 1); jptocase(d, d + 1, LOWER); } else #endif *d = TO_LOWER(*s); return (fptr)do_copy; } static fptr do_Lower(d, s) char *d, *s; { #ifdef JP do_lower(d, s); #else *d = TO_LOWER(*s); #endif return (fptr)do_Lower; } static fptr strnfcpy(f, d, s, n) fptr f; char *d; char *s; int n; { while (n-- > 0) { #ifdef JP if (IsKanji(*s)) { f = (fptr)(f(d, s)); s += 2; d += 2; n--; continue; } #endif f = (fptr)(f(d, s)); /* Turbo C complains without the typecast */ if (!*s++) break; d++; } return f; } #endif /* * regtilde: replace tildes in the pattern by the old pattern * * Short explanation of the tilde: it stands for the previous replacement * pattern. If that previous pattern also contains a ~ we should go back * a step further... but we insert the previous pattern into the current one * and remember that. * This still does not handle the case where "magic" changes. TODO? * * New solution: The tilde's are parsed once before the first call to regsub(). * In the old solution (tilde handled in regsub()) is was possible to get an * endless loop. */ char * regtilde(source, magic) char *source; int magic; { char *newsub = NULL; char *tmpsub; char *p; int len; int prevlen; for (p = source; *p; ++p) { if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic)) { if (reg_prev_sub) { /* length = len(current) - 1 + len(previous) + 1 */ prevlen = strlen(reg_prev_sub); tmpsub = alloc((unsigned)(strlen(source) + prevlen)); if (tmpsub) { /* copy prefix */ len = (int)(p - source); /* not including ~ */ strncpy(tmpsub, source, (size_t)len); /* interpretate tilde */ strcpy(tmpsub + len, reg_prev_sub); /* copy postfix */ if (!magic) ++p; /* back off \ */ strcat(tmpsub + len, p + 1); free(newsub); newsub = tmpsub; p = newsub + len + prevlen; } } else if (magic) strcpy(p, p + 1); /* remove '~' */ else strcpy(p, p + 2); /* remove '\~' */ } else if (*p == '\\' && p[1]) /* skip escaped characters */ ++p; } free(reg_prev_sub); if (newsub) { source = newsub; reg_prev_sub = newsub; } else reg_prev_sub = strsave(source); return source; } /* - regsub - perform substitutions after a regexp match * * Returns the size of the replacement, including terminating \0. */ int regsub(prog, source, dest, copy, magic) regexp *prog; char *source; char *dest; int copy; int magic; { register char *src; register char *dst; register char c; register int no; register int len; #ifdef CASECONVERT fptr func = (fptr)do_copy; #endif if (prog == NULL || source == NULL || dest == NULL) { emsg(e_null); return 0; } if (UCHARAT(prog->program) != MAGIC) { emsg(e_re_corr); return 0; } src = source; dst = dest; while ((c = *src++) != '\0') { no = -1; if (c == '&' && magic) no = 0; else if (c == '\\') { if (*src == '&' && !magic) { ++src; no = 0; } else if ('0' <= *src && *src <= '9') { no = *src++ - '0'; } #ifdef CASECONVERT else if (strchr("uUlLeE", *src)) { switch (*src++) { case 'u': func = (fptr)do_upper; continue; case 'U': func = (fptr)do_Upper; continue; case 'l': func = (fptr)do_lower; continue; case 'L': func = (fptr)do_Lower; continue; case 'e': case 'E': func = (fptr)do_copy; continue; } } #endif } if (no < 0) /* Ordinary character. */ { if (c == '\\') c = *src++; if (copy) { #ifdef CASECONVERT func = (fptr)(func(dst, src - 1)); /* Turbo C complains without the typecast */ # ifdef JP if (IsKanji(c)) { dst++; src++; } # endif #else *dst = c; #endif } dst++; } else if (prog->startp[no] != NULL && prog->endp[no] != NULL) { len = (int)(prog->endp[no] - prog->startp[no]); if (copy) { #ifdef CASECONVERT func = strnfcpy(func, dst, prog->startp[no], len); #else (void) strncpy(dst, prog->startp[no], len); #endif } dst += len; if (copy && len != 0 && *(dst - 1) == '\0') { /* strncpy hit NUL. */ emsg(e_re_damg); goto exit; } } } if (copy) *dst = '\0'; exit: return (int)((dst - dest) + 1); }