X-Git-Url: http://git.silc.fi/gitweb/?p=runtime.git;a=blobdiff_plain;f=lib%2Fsilcutil%2Fsilcregex.c;h=3c83ed783d693149003ffa7cdaa47564c834952e;hp=c11ea8ad726040278e3976e34efaa12e0a3655c2;hb=296e4d08e79816ef1474596e81857b9261500f22;hpb=96ae4adfa8757be98b0587941e26d0733e1fb22e diff --git a/lib/silcutil/silcregex.c b/lib/silcutil/silcregex.c index c11ea8ad..3c83ed78 100644 --- a/lib/silcutil/silcregex.c +++ b/lib/silcutil/silcregex.c @@ -1,40 +1,42 @@ -/* - - regexpr.c - - Author: Tatu Ylonen - - Copyright (c) 1991 Tatu Ylonen, Espoo, Finland - - Permission to use, copy, modify, distribute, and sell this software - and its documentation is hereby granted without fee, provided that the - above copyright notice appears in all source code copies, the name of - Tatu Ylonen is not used to advertise products containing this software - or a derivation thereof, and all modified versions are clearly marked - as such. - - This software is provided "as is" without express or implied warranty. - - Created: Thu Sep 26 17:14:05 1991 ylo - Last modified: Sun Mar 29 16:47:31 1992 ylo - - This code draws many ideas from the regular expression packages by - Henry Spencer of the University of Toronto and Richard Stallman of the - Free Software Foundation. - - Emacs-specific code and syntax table code is almost directly borrowed - from GNU regexp. - - The SILC Regex API by Pekka Riikonen, under the same license as the original - code. +/* regexpr.c + * + * Author: Tatu Ylonen + * + * Copyright (c) 1991 Tatu Ylonen, Espoo, Finland + * + * Permission to use, copy, modify, distribute, and sell this software + * and its documentation for any purpose is hereby granted without + * fee, provided that the above copyright notice appear in all copies. + * This software is provided "as is" without express or implied + * warranty. + * + * Created: Thu Sep 26 17:14:05 1991 ylo + * Last modified: Mon Nov 4 17:06:48 1991 ylo + * Ported to Think C: 19 Jan 1992 guido@cwi.nl + * + * This code draws many ideas from the regular expression packages by + * Henry Spencer of the University of Toronto and Richard Stallman of + * the Free Software Foundation. + * + * Bugs fixed and lots of reorganization by Jeffrey C. Ollie, April + * 1997 Thanks for bug reports and ideas from Andrew Kuchling, Tim + * Peters, Guido van Rossum, Ka-Ping Yee, Sjoerd Mullender, and + * probably one or two others that I'm forgetting. + * + */ +/* + The SILC Regex API and modifications by Pekka Riikonen, under the same + license as the original code. We've added the following features: + + - RE_SYNTAX_POSIX POSIX extended regular expression syntax + - RE_REPEAT bounded repeat a{n,m} (RE_SYNTAX_POSIX) + - RE_NOTBOL bol fails to match (conforming POSIX regex API) + - RE_NOTEOL eol fails to match (conforming POSIX regex API) + - SilcStack support compile/match without real memory allocations */ -#include "silc.h" - -/* Modified for use in SILC Runtime Toolkit. I think we have disabled many - features we could use, for the sake of simple API, which we may want to - extend later. */ +#include "silcruntime.h" #define RE_NREGS 128 /* number of registers available */ @@ -47,11 +49,15 @@ #define RE_CONTEXT_INDEP_OPS 32 /* ^$?*+ are special in all contexts */ #define RE_ANSI_HEX 64 /* ansi sequences (\n etc) and \xhh */ #define RE_NO_GNU_EXTENSIONS 128 /* no gnu extensions */ +#define RE_NOTBOL 256 /* bol fails to match */ +#define RE_NOTEOL 512 /* eol fails to match */ +#define RE_REPEAT 1024 /* bounded repeat expression */ /* definitions for some common regexp styles */ #define RE_SYNTAX_AWK (RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_CONTEXT_INDEP_OPS) #define RE_SYNTAX_EGREP (RE_SYNTAX_AWK|RE_NEWLINE_OR) #define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR) +#define RE_SYNTAX_POSIX (RE_SYNTAX_AWK|RE_REPEAT) #define RE_SYNTAX_EMACS 0 /* Registers */ @@ -60,56 +66,347 @@ typedef struct re_registers { int end[RE_NREGS]; /* end offset of region */ } *regexp_registers_t; -int re_set_syntax(int syntax); -/* This sets the syntax to use and returns the previous syntax. The - syntax is specified by a bit mask of the above defined bits. */ - -SilcResult re_compile_pattern(char *regex, int regex_size, SilcRegex compiled); -/* This compiles the regexp (given in regex and length in regex_size). - This returns NULL if the regexp compiled successfully, and an error - message if an error was encountered. The buffer field must be - initialized to a memory area allocated by malloc (or to NULL) before - use, and the allocated field must be set to its length (or 0 if buffer is - NULL). Also, the translate field must be set to point to a valid - translation table, or NULL if it is not used. */ - -int re_match(SilcRegex compiled, char *string, int size, int pos, - regexp_registers_t regs); -/* This tries to match the regexp against the string. This returns the - length of the matched portion, or -1 if the pattern could not be - matched and -2 if an error (such as failure stack overflow) is - encountered. */ - -int re_match_2(SilcRegex compiled, char *string1, int size1, - char *string2, int size2, int pos, regexp_registers_t regs, - int mstop); -/* This tries to match the regexp to the concatenation of string1 and - string2. This returns the length of the matched portion, or -1 if the - pattern could not be matched and -2 if an error (such as failure stack - overflow) is encountered. */ - -int re_search(SilcRegex compiled, char *string, int size, int startpos, - int range, regexp_registers_t regs); -/* This rearches for a substring matching the regexp. This returns the first - index at which a match is found. range specifies at how many positions to - try matching; positive values indicate searching forwards, and negative - values indicate searching backwards. mstop specifies the offset beyond - which a match must not go. This returns -1 if no match is found, and - -2 if an error (such as failure stack overflow) is encountered. */ - -int re_search_2(SilcRegex compiled, char *string1, int size1, - char *string2, int size2, int startpos, int range, - regexp_registers_t regs, int mstop); -/* This is like re_search, but search from the concatenation of string1 and - string2. */ - -void re_compile_fastmap(SilcRegex compiled); -/* This computes the fastmap for the regexp. For this to have any effect, - the calling program must have initialized the fastmap field to point - to an array of 256 characters. */ - -#define MACRO_BEGIN do { -#define MACRO_END } while (0) +/* The original code blithely assumed that sizeof(short) == 2. Not + * always true. Original instances of "(short)x" were replaced by + * SHORT(x), where SHORT is #defined below. */ + +#define SHORT(x) ((x) & 0x8000 ? (x) - 0x10000 : (x)) + +/* The stack implementation is taken from an idea by Andrew Kuchling. + * It's a doubly linked list of arrays. The advantages of this over a + * simple linked list are that the number of mallocs required are + * reduced. It also makes it possible to statically allocate enough + * space so that small patterns don't ever need to call malloc. + * + * The advantages over a single array is that is periodically + * realloced when more space is needed is that we avoid ever copying + * the stack. */ + +/* item_t is the basic stack element. Defined as a union of + * structures so that both registers, failure points, and counters can + * be pushed/popped from the stack. There's nothing built into the + * item to keep track of whether a certain stack item is a register, a + * failure point, or a counter. */ + +typedef union item_t +{ + struct + { + int num; + int level; + unsigned char *start; + unsigned char *end; + } reg; + struct + { + int count; + int level; + int phantom; + unsigned char *code; + unsigned char *text; + } fail; + struct + { + int num; + int level; + int count; + } cntr; +} item_t; + +#define STACK_PAGE_SIZE 256 +#define NUM_REGISTERS 256 + +/* A 'page' of stack items. */ + +typedef struct item_page_t +{ + item_t items[STACK_PAGE_SIZE]; + struct item_page_t *prev; + struct item_page_t *next; +} item_page_t; + +typedef struct match_state +{ + /* The number of registers that have been pushed onto the stack + * since the last failure point. */ + + int count; + + /* Used to control when registers need to be pushed onto the + * stack. */ + + int level; + + /* The number of failure points on the stack. */ + + int point; + + /* Storage for the registers. Each register consists of two + * pointers to characters. So register N is represented as + * start[N] and end[N]. The pointers must be converted to + * offsets from the beginning of the string before returning the + * registers to the calling program. */ + + unsigned char *start[NUM_REGISTERS]; + unsigned char *end[NUM_REGISTERS]; + + /* Keeps track of whether a register has changed recently. */ + + int changed[NUM_REGISTERS]; + + /* Structure to encapsulate the stack. */ + struct + { + /* index into the current page. If index == 0 and you need + * to pop an item, move to the previous page and set index + * = STACK_PAGE_SIZE - 1. Otherwise decrement index to + * push a page. If index == STACK_PAGE_SIZE and you need + * to push a page move to the next page and set index = + * 0. If there is no new next page, allocate a new page + * and link it in. Otherwise, increment index to push a + * page. */ + + int index; + item_page_t *current; /* Pointer to the current page. */ + item_page_t first; /* First page is statically allocated. */ + } stack; +} match_state; + +/* Initialize a state object */ + +/* #define NEW_STATE(state) \ */ +/* memset(&state, 0, (void *)(&state.stack) - (void *)(&state)); \ */ +/* state.stack.current = &state.stack.first; \ */ +/* state.stack.first.prev = NULL; \ */ +/* state.stack.first.next = NULL; \ */ +/* state.stack.index = 0; \ */ +/* state.level = 1 */ + +#define NEW_STATE(state, nregs) \ + { \ + int i; \ + for (i = 0; i < nregs; i++) \ + { \ + state.start[i] = NULL; \ + state.end[i] = NULL; \ + state.changed[i] = 0; \ + } \ + state.stack.current = &state.stack.first; \ + state.stack.first.prev = NULL; \ + state.stack.first.next = NULL; \ + state.stack.index = 0; \ + state.level = 1; \ + state.count = 0; \ + state.level = 0; \ + state.point = 0; \ + } + +/* Free any memory that might have been malloc'd */ + +#define FREE_STATE(state) \ + while(state.stack.first.next != NULL) \ + { \ + state.stack.current = state.stack.first.next; \ + state.stack.first.next = state.stack.current->next; \ + silc_sfree(bufp->rstack, state.stack.current); \ + } + +/* Discard the top 'count' stack items. */ + +#define STACK_DISCARD(stack, count, on_error) \ + stack.index -= count; \ + while (stack.index < 0) \ + { \ + if (stack.current->prev == NULL) \ + on_error; \ + stack.current = stack.current->prev; \ + stack.index += STACK_PAGE_SIZE; \ + } + +/* Store a pointer to the previous item on the stack. Used to pop an + * item off of the stack. */ + +#define STACK_PREV(stack, top, on_error) \ + if (stack.index == 0) \ + { \ + if (stack.current->prev == NULL) \ + on_error; \ + stack.current = stack.current->prev; \ + stack.index = STACK_PAGE_SIZE - 1; \ + } \ + else \ + { \ + stack.index--; \ + } \ + top = &(stack.current->items[stack.index]) + +/* Store a pointer to the next item on the stack. Used to push an item + * on to the stack. */ + +#define STACK_NEXT(stack, top, on_error) \ + if (stack.index == STACK_PAGE_SIZE) \ + { \ + if (stack.current->next == NULL) \ + { \ + stack.current->next = \ + (item_page_t *)silc_smalloc(bufp->rstack, sizeof(item_page_t)); \ + if (stack.current->next == NULL) \ + on_error; \ + stack.current->next->prev = stack.current; \ + stack.current->next->next = NULL; \ + } \ + stack.current = stack.current->next; \ + stack.index = 0; \ + } \ + top = &(stack.current->items[stack.index++]) + +/* Store a pointer to the item that is 'count' items back in the + * stack. STACK_BACK(stack, top, 1, on_error) is equivalent to + * STACK_TOP(stack, top, on_error). */ + +#define STACK_BACK(stack, top, count, on_error) \ + { \ + int index; \ + item_page_t *current; \ + current = stack.current; \ + index = stack.index - (count); \ + while (index < 0) \ + { \ + if (current->prev == NULL) \ + on_error; \ + current = current->prev; \ + index += STACK_PAGE_SIZE; \ + } \ + top = &(current->items[index]); \ + } + +/* Store a pointer to the top item on the stack. Execute the + * 'on_error' code if there are no items on the stack. */ + +#define STACK_TOP(stack, top, on_error) \ + if (stack.index == 0) \ + { \ + if (stack.current->prev == NULL) \ + on_error; \ + top = &(stack.current->prev->items[STACK_PAGE_SIZE - 1]); \ + } \ + else \ + { \ + top = &(stack.current->items[stack.index - 1]); \ + } + +/* Test to see if the stack is empty */ + +#define STACK_EMPTY(stack) ((stack.index == 0) && \ + (stack.current->prev == NULL)) + +/* Return the start of register 'reg' */ + +#define GET_REG_START(state, reg) (state.start[reg]) + +/* Return the end of register 'reg' */ + +#define GET_REG_END(state, reg) (state.end[reg]) + +/* Set the start of register 'reg'. If the state of the register needs + * saving, push it on the stack. */ + +#define SET_REG_START(state, reg, text, on_error) \ + if(state.changed[reg] < state.level) \ + { \ + item_t *item; \ + STACK_NEXT(state.stack, item, on_error); \ + item->reg.num = reg; \ + item->reg.start = state.start[reg]; \ + item->reg.end = state.end[reg]; \ + item->reg.level = state.changed[reg]; \ + state.changed[reg] = state.level; \ + state.count++; \ + } \ + state.start[reg] = text + +/* Set the end of register 'reg'. If the state of the register needs + * saving, push it on the stack. */ + +#define SET_REG_END(state, reg, text, on_error) \ + if(state.changed[reg] < state.level) \ + { \ + item_t *item; \ + STACK_NEXT(state.stack, item, on_error); \ + item->reg.num = reg; \ + item->reg.start = state.start[reg]; \ + item->reg.end = state.end[reg]; \ + item->reg.level = state.changed[reg]; \ + state.changed[reg] = state.level; \ + state.count++; \ + } \ + state.end[reg] = text + +#define PUSH_FAILURE(state, xcode, xtext, on_error) \ + { \ + item_t *item; \ + STACK_NEXT(state.stack, item, on_error); \ + item->fail.code = xcode; \ + item->fail.text = xtext; \ + item->fail.count = state.count; \ + item->fail.level = state.level; \ + item->fail.phantom = 0; \ + state.count = 0; \ + state.level++; \ + state.point++; \ + } + +/* Update the last failure point with a new position in the text. */ + +#define UPDATE_FAILURE(state, xtext, on_error) \ + { \ + item_t *item; \ + STACK_BACK(state.stack, item, state.count + 1, on_error); \ + if (!item->fail.phantom) \ + { \ + item_t *item2; \ + STACK_NEXT(state.stack, item2, on_error); \ + item2->fail.code = item->fail.code; \ + item2->fail.text = xtext; \ + item2->fail.count = state.count; \ + item2->fail.level = state.level; \ + item2->fail.phantom = 1; \ + state.count = 0; \ + state.level++; \ + state.point++; \ + } \ + else \ + { \ + STACK_DISCARD(state.stack, state.count, on_error); \ + STACK_TOP(state.stack, item, on_error); \ + item->fail.text = xtext; \ + state.count = 0; \ + state.level++; \ + } \ + } + +#define POP_FAILURE(state, xcode, xtext, on_empty, on_error) \ + { \ + item_t *item; \ + do \ + { \ + while(state.count > 0) \ + { \ + STACK_PREV(state.stack, item, on_error); \ + state.start[item->reg.num] = item->reg.start; \ + state.end[item->reg.num] = item->reg.end; \ + state.changed[item->reg.num] = item->reg.level; \ + state.count--; \ + } \ + STACK_PREV(state.stack, item, on_empty); \ + xcode = item->fail.code; \ + xtext = item->fail.text; \ + state.count = item->fail.count; \ + state.level = item->fail.level; \ + state.point--; \ + } \ + while (item->fail.text == NULL); \ + } enum regexp_compiled_ops /* opcodes for compiled regexp */ { @@ -133,11 +430,9 @@ enum regexp_compiled_ops /* opcodes for compiled regexp */ Cwordend, /* match at end of word */ Cwordbound, /* match if at word boundary */ Cnotwordbound, /* match if not at word boundary */ -#ifdef emacs - Cemacs_at_dot, /* emacs only: matches at dot */ -#endif /* emacs */ Csyntaxspec, /* matches syntax code (1 byte follows) */ - Cnotsyntaxspec /* matches if syntax code does not match (1 byte foll)*/ + Cnotsyntaxspec, /* matches if syntax code does not match (1 byte foll)*/ + Crepeat1, }; enum regexp_syntax_op /* syntax codes for plain and quoted characters */ @@ -166,161 +461,151 @@ enum regexp_syntax_op /* syntax codes for plain and quoted characters */ Rwordend, /* end of word */ Rwordbound, /* word bound */ Rnotwordbound, /* not word bound */ -#ifdef emacs - Remacs_at_dot, /* emacs: at dot */ - Remacs_syntaxspec, /* syntaxspec */ - Remacs_notsyntaxspec, /* notsyntaxspec */ -#endif /* emacs */ - Rnum_ops + Rnum_ops, + Ropenrep, /* opening bounded repeat */ }; -static int re_compile_initialized = 0; -static int regexp_syntax = 0; -static unsigned char regexp_plain_ops[256]; -static unsigned char regexp_quoted_ops[256]; -static unsigned char regexp_precedences[Rnum_ops]; -static int regexp_context_indep_ops; -static int regexp_ansi_sequences; +#define Sword 1 +#define Swhitespace 2 +#define Sdigit 4 +#define Soctaldigit 8 +#define Shexdigit 16 #define NUM_LEVELS 5 /* number of precedence levels in use */ #define MAX_NESTING 100 /* max nesting level of operators */ - -#ifdef emacs - -/* This code is for emacs compatibility only. */ - -#include "config.h" -#include "lisp.h" -#include "buffer.h" -#include "syntax.h" - -/* emacs defines NULL in some strange way? */ -#undef NULL -#define NULL 0 - -#else /* emacs */ - -#define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)] -#define Sword 1 - -#ifdef SYNTAX_TABLE -char *re_syntax_table; -#else -static char re_syntax_table[256]; -#endif /* SYNTAX_TABLE */ - -#endif /* emacs */ - -static void re_compile_initialize() +#define SYNTAX(ch) silc_re_syntax_table[(unsigned char)(ch)] + +static int silc_regexp_syntax = RE_SYNTAX_POSIX; +static int silc_regexp_context_indep_ops; +static int silc_regexp_ansi_sequences; +static int silc_re_compile_initialized = 0; +static unsigned char silc_re_syntax_table[256]; +static unsigned char silc_regexp_plain_ops[256]; +static unsigned char silc_regexp_quoted_ops[256]; +static unsigned char silc_regexp_precedencess[Rnum_ops]; + +void silc_re_compile_initialize(void) { int a; -#if !defined(emacs) && !defined(SYNTAX_TABLE) static int syntax_table_inited = 0; if (!syntax_table_inited) { syntax_table_inited = 1; - memset(re_syntax_table, 0, 256); + memset(silc_re_syntax_table, 0, 256); for (a = 'a'; a <= 'z'; a++) - re_syntax_table[a] = Sword; + silc_re_syntax_table[a] = Sword; for (a = 'A'; a <= 'Z'; a++) - re_syntax_table[a] = Sword; + silc_re_syntax_table[a] = Sword; for (a = '0'; a <= '9'; a++) - re_syntax_table[a] = Sword; + silc_re_syntax_table[a] = Sword | Sdigit | Shexdigit; + for (a = '0'; a <= '7'; a++) + silc_re_syntax_table[a] |= Soctaldigit; + for (a = 'A'; a <= 'F'; a++) + silc_re_syntax_table[a] |= Shexdigit; + for (a = 'a'; a <= 'f'; a++) + silc_re_syntax_table[a] |= Shexdigit; + silc_re_syntax_table['_'] = Sword; + for (a = 9; a <= 13; a++) + silc_re_syntax_table[a] = Swhitespace; + silc_re_syntax_table[' '] = Swhitespace; } -#endif /* !emacs && !SYNTAX_TABLE */ - re_compile_initialized = 1; + silc_re_compile_initialized = 1; for (a = 0; a < 256; a++) { - regexp_plain_ops[a] = Rnormal; - regexp_quoted_ops[a] = Rnormal; + silc_regexp_plain_ops[a] = Rnormal; + silc_regexp_quoted_ops[a] = Rnormal; } for (a = '0'; a <= '9'; a++) - regexp_quoted_ops[a] = Rmemory; - regexp_plain_ops['\134'] = Rquote; - if (regexp_syntax & RE_NO_BK_PARENS) + silc_regexp_quoted_ops[a] = Rmemory; + silc_regexp_plain_ops['\134'] = Rquote; + if (silc_regexp_syntax & RE_NO_BK_PARENS) { - regexp_plain_ops['('] = Ropenpar; - regexp_plain_ops[')'] = Rclosepar; + silc_regexp_plain_ops['('] = Ropenpar; + silc_regexp_plain_ops[')'] = Rclosepar; } else { - regexp_quoted_ops['('] = Ropenpar; - regexp_quoted_ops[')'] = Rclosepar; + silc_regexp_quoted_ops['('] = Ropenpar; + silc_regexp_quoted_ops[')'] = Rclosepar; } - if (regexp_syntax & RE_NO_BK_VBAR) - regexp_plain_ops['\174'] = Ror; + if (silc_regexp_syntax & RE_NO_BK_VBAR) + silc_regexp_plain_ops['\174'] = Ror; else - regexp_quoted_ops['\174'] = Ror; - regexp_plain_ops['*'] = Rstar; - if (regexp_syntax & RE_BK_PLUS_QM) + silc_regexp_quoted_ops['\174'] = Ror; + silc_regexp_plain_ops['*'] = Rstar; + if (silc_regexp_syntax & RE_BK_PLUS_QM) { - regexp_quoted_ops['+'] = Rplus; - regexp_quoted_ops['?'] = Roptional; + silc_regexp_quoted_ops['+'] = Rplus; + silc_regexp_quoted_ops['?'] = Roptional; } else { - regexp_plain_ops['+'] = Rplus; - regexp_plain_ops['?'] = Roptional; + silc_regexp_plain_ops['+'] = Rplus; + silc_regexp_plain_ops['?'] = Roptional; } - if (regexp_syntax & RE_NEWLINE_OR) - regexp_plain_ops['\n'] = Ror; - regexp_plain_ops['\133'] = Ropenset; - regexp_plain_ops['\136'] = Rbol; - regexp_plain_ops['$'] = Reol; - regexp_plain_ops['.'] = Ranychar; - if (!(regexp_syntax & RE_NO_GNU_EXTENSIONS)) + if (silc_regexp_syntax & RE_NEWLINE_OR) + silc_regexp_plain_ops['\n'] = Ror; + silc_regexp_plain_ops['\133'] = Ropenset; + silc_regexp_plain_ops['\136'] = Rbol; + silc_regexp_plain_ops['$'] = Reol; + silc_regexp_plain_ops['.'] = Ranychar; + if (!(silc_regexp_syntax & RE_NO_GNU_EXTENSIONS)) { -#ifdef emacs - regexp_quoted_ops['='] = Remacs_at_dot; - regexp_quoted_ops['s'] = Remacs_syntaxspec; - regexp_quoted_ops['S'] = Remacs_notsyntaxspec; -#endif /* emacs */ - regexp_quoted_ops['w'] = Rwordchar; - regexp_quoted_ops['W'] = Rnotwordchar; - regexp_quoted_ops['<'] = Rwordbeg; - regexp_quoted_ops['>'] = Rwordend; - regexp_quoted_ops['b'] = Rwordbound; - regexp_quoted_ops['B'] = Rnotwordbound; - regexp_quoted_ops['`'] = Rbegbuf; - regexp_quoted_ops['\''] = Rendbuf; + silc_regexp_quoted_ops['w'] = Rwordchar; + silc_regexp_quoted_ops['W'] = Rnotwordchar; + silc_regexp_quoted_ops['<'] = Rwordbeg; + silc_regexp_quoted_ops['>'] = Rwordend; + silc_regexp_quoted_ops['b'] = Rwordbound; + silc_regexp_quoted_ops['B'] = Rnotwordbound; + silc_regexp_quoted_ops['`'] = Rbegbuf; + silc_regexp_quoted_ops['\''] = Rendbuf; } - if (regexp_syntax & RE_ANSI_HEX) - regexp_quoted_ops['v'] = Rextended_memory; + if (silc_regexp_syntax & RE_ANSI_HEX) + silc_regexp_quoted_ops['v'] = Rextended_memory; for (a = 0; a < Rnum_ops; a++) - regexp_precedences[a] = 4; - if (regexp_syntax & RE_TIGHT_VBAR) + silc_regexp_precedencess[a] = 4; + if (silc_regexp_syntax & RE_TIGHT_VBAR) { - regexp_precedences[Ror] = 3; - regexp_precedences[Rbol] = 2; - regexp_precedences[Reol] = 2; + silc_regexp_precedencess[Ror] = 3; + silc_regexp_precedencess[Rbol] = 2; + silc_regexp_precedencess[Reol] = 2; } else { - regexp_precedences[Ror] = 2; - regexp_precedences[Rbol] = 3; - regexp_precedences[Reol] = 3; + silc_regexp_precedencess[Ror] = 2; + silc_regexp_precedencess[Rbol] = 3; + silc_regexp_precedencess[Reol] = 3; + } + if (silc_regexp_syntax & RE_REPEAT) + { + if (silc_regexp_syntax & RE_NO_BK_PARENS) + { + silc_regexp_plain_ops['{'] = Ropenrep; + } + else + { + silc_regexp_quoted_ops['{'] = Ropenrep; + } } - regexp_precedences[Rclosepar] = 1; - regexp_precedences[Rend] = 0; - regexp_context_indep_ops = (regexp_syntax & RE_CONTEXT_INDEP_OPS) != 0; - regexp_ansi_sequences = (regexp_syntax & RE_ANSI_HEX) != 0; + silc_regexp_precedencess[Rclosepar] = 1; + silc_regexp_precedencess[Rend] = 0; + silc_regexp_context_indep_ops = (silc_regexp_syntax & RE_CONTEXT_INDEP_OPS) != 0; + silc_regexp_ansi_sequences = (silc_regexp_syntax & RE_ANSI_HEX) != 0; } -int re_set_syntax(syntax) -int syntax; +int silc_re_set_syntax(int syntax) { int ret; - ret = regexp_syntax; - regexp_syntax = syntax; - re_compile_initialize(); + ret = silc_regexp_syntax; + silc_regexp_syntax = syntax; + silc_re_compile_initialize(); return ret; } -static int hex_char_to_decimal(ch) -int ch; +static int silc_hex_char_to_decimal(int ch) { if (ch >= '0' && ch <= '9') return ch - '0'; @@ -331,348 +616,845 @@ int ch; return 16; } -SilcResult re_compile_pattern(regex, size, bufp) -char *regex; -int size; -SilcRegex bufp; +static int silc_re_compile_fastmap_aux(unsigned char *code, int pos, + unsigned char *visited, + unsigned char *can_be_null, + unsigned char *fastmap) { - int a, pos, op, current_level, level, opcode; - int pattern_offset = 0, alloc; - int starts[NUM_LEVELS * MAX_NESTING], starts_base; - int future_jumps[MAX_NESTING], num_jumps; - unsigned char ch = 0; - char *pattern, *translate; - int next_register, paren_depth, num_open_registers, open_registers[RE_NREGS]; - int beginning_context; - -#define NEXTCHAR(var) \ - MACRO_BEGIN \ - if (pos >= size) \ - goto ends_prematurely; \ - (var) = regex[pos]; \ - pos++; \ - MACRO_END - -#define ALLOC(amount) \ - MACRO_BEGIN \ - if (pattern_offset+(amount) > alloc) \ - { \ - alloc += 256 + (amount); \ - pattern = silc_realloc(pattern, alloc); \ - if (!pattern) \ - goto out_of_memory; \ - } \ - MACRO_END + int a; + int b; + int syntaxcode; -#define STORE(ch) pattern[pattern_offset++] = (ch) + if (visited[pos]) + return 0; /* we have already been here */ + visited[pos] = 1; + for (;;) + switch (code[pos++]) { + case Cend: + { + *can_be_null = 1; + return 0; + } + case Cbol: + case Cbegbuf: + case Cendbuf: + case Cwordbeg: + case Cwordend: + case Cwordbound: + case Cnotwordbound: + { + for (a = 0; a < 256; a++) + fastmap[a] = 1; + break; + } + case Csyntaxspec: + { + syntaxcode = code[pos++]; + for (a = 0; a < 256; a++) + if (SYNTAX(a) & syntaxcode) + fastmap[a] = 1; + return 0; + } + case Cnotsyntaxspec: + { + syntaxcode = code[pos++]; + for (a = 0; a < 256; a++) + if (!(SYNTAX(a) & syntaxcode) ) + fastmap[a] = 1; + return 0; + } + case Ceol: + { + fastmap['\n'] = 1; + if (*can_be_null == 0) + *can_be_null = 2; /* can match null, but only at end of buffer*/ + return 0; + } + case Cset: + { + for (a = 0; a < 256/8; a++) + if (code[pos + a] != 0) + for (b = 0; b < 8; b++) + if (code[pos + a] & (1 << b)) + fastmap[(a << 3) + b] = 1; + pos += 256/8; + return 0; + } + case Cexact: + { + fastmap[(unsigned char)code[pos]] = 1; + return 0; + } + case Canychar: + { + for (a = 0; a < 256; a++) + if (a != '\n') + fastmap[a] = 1; + return 0; + } + case Cstart_memory: + case Cend_memory: + { + pos++; + break; + } + case Cmatch_memory: + { + for (a = 0; a < 256; a++) + fastmap[a] = 1; + *can_be_null = 1; + return 0; + } + case Cjump: + case Cdummy_failure_jump: + case Cupdate_failure_jump: + case Cstar_jump: + { + a = (unsigned char)code[pos++]; + a |= (unsigned char)code[pos++] << 8; + pos += (int)SHORT(a); + if (visited[pos]) + { + /* argh... the regexp contains empty loops. This is not + good, as this may cause a failure stack overflow when + matching. Oh well. */ + /* this path leads nowhere; pursue other paths. */ + return 0; + } + visited[pos] = 1; + break; + } + case Cfailure_jump: + { + a = (unsigned char)code[pos++]; + a |= (unsigned char)code[pos++] << 8; + a = pos + (int)SHORT(a); + return silc_re_compile_fastmap_aux(code, a, visited, + can_be_null, fastmap); + } + case Crepeat1: + { + pos += 2; + break; + } + default: + { + silc_set_errno(SILC_ERR_REGEX_OPCODE); + return -1; + /*NOTREACHED*/ + } + } +} -#define CURRENT_LEVEL_START (starts[starts_base + current_level]) +static int silc_re_do_compile_fastmap(unsigned char *buffer, int used, int pos, + unsigned char *can_be_null, + unsigned char *fastmap, SilcRegex bufp) +{ + unsigned char small_visited[512], *visited; + int ret; -#define SET_LEVEL_START starts[starts_base + current_level] = pattern_offset + if (used <= sizeof(small_visited)) + visited = small_visited; + else + { + silc_stack_push(bufp->rstack, NULL); + visited = silc_smalloc(bufp->rstack, used); + if (!visited) { + silc_stack_pop(bufp->rstack); + return 0; + } + } + *can_be_null = 0; + memset(fastmap, 0, 256); + memset(visited, 0, used); + ret = silc_re_compile_fastmap_aux(buffer, pos, visited, + can_be_null, fastmap); + if (visited != small_visited) { + silc_sfree(bufp->rstack, visited); + silc_stack_pop(bufp->rstack); + } + return ret == 0; +} -#define PUSH_LEVEL_STARTS if (starts_base < (MAX_NESTING-1)*NUM_LEVELS) \ - starts_base += NUM_LEVELS; \ - else \ - goto too_complex +int silc_re_compile_fastmap(SilcRegex bufp) +{ + if (!bufp->fastmap || bufp->fastmap_accurate) + return 0; + SILC_ASSERT(bufp->used > 0); + if (!silc_re_do_compile_fastmap(bufp->buffer, + bufp->used, + 0, + &bufp->can_be_null, + bufp->fastmap, bufp)) + return -1; + if (bufp->buffer[0] == Cbol) + bufp->anchor = 1; /* begline */ + else { + if (bufp->buffer[0] == Cbegbuf) + bufp->anchor = 2; /* begbuf */ + else + bufp->anchor = 0; /* none */ + } + bufp->fastmap_accurate = 1; + return 0; +} -#define POP_LEVEL_STARTS starts_base -= NUM_LEVELS +/* + * star is coded as: + * 1: failure_jump 2 + * ... code for operand of star + * star_jump 1 + * 2: ... code after star + * + * We change the star_jump to update_failure_jump if we can determine + * that it is safe to do so; otherwise we change it to an ordinary + * jump. + * + * plus is coded as + * + * jump 2 + * 1: failure_jump 3 + * 2: ... code for operand of plus + * star_jump 1 + * 3: ... code after plus + * + * For star_jump considerations this is processed identically to star. + * + */ + +static int silc_re_optimize_star_jump(SilcRegex bufp, unsigned char *code) +{ + unsigned char map[256]; + unsigned char can_be_null; + unsigned char *p1; + unsigned char *p2; + unsigned char ch; + int a; + int b; + int num_instructions = 0; -#define PUT_ADDR(offset,addr) \ - MACRO_BEGIN \ - int disp = (addr) - (offset) - 2; \ - pattern[(offset)] = disp & 0xff; \ - pattern[(offset)+1] = (disp>>8) & 0xff; \ - MACRO_END - -#define INSERT_JUMP(pos,type,addr) \ - MACRO_BEGIN \ - int a, p = (pos), t = (type), ad = (addr); \ - for (a = pattern_offset - 1; a >= p; a--) \ - pattern[a + 3] = pattern[a]; \ - pattern[p] = t; \ - PUT_ADDR(p+1,ad); \ - pattern_offset += 3; \ - MACRO_END + a = (unsigned char)*code++; + a |= (unsigned char)*code++ << 8; + a = (int)SHORT(a); -#define SETBIT(buf,offset,bit) (buf)[(offset)+(bit)/8] |= (1<<((bit) & 7)) + p1 = code + a + 3; /* skip the failure_jump */ + /* Check that the jump is within the pattern */ + if (p1buffer || bufp->buffer+bufp->usedallocated = alloc; \ - bufp->buffer = pattern; \ - bufp->used = pattern_offset; \ - MACRO_END - -#define GETHEX(var) \ - MACRO_BEGIN \ - char gethex_ch, gethex_value; \ - NEXTCHAR(gethex_ch); \ - gethex_value = hex_char_to_decimal(gethex_ch); \ - if (gethex_value == 16) \ - goto hex_error; \ - NEXTCHAR(gethex_ch); \ - gethex_ch = hex_char_to_decimal(gethex_ch); \ - if (gethex_ch == 16) \ - goto hex_error; \ - (var) = gethex_value * 16 + gethex_ch; \ - MACRO_END - -#define ANSI_TRANSLATE(ch) \ - MACRO_BEGIN \ - switch (ch) \ - { \ - case 'a': \ - case 'A': \ - ch = 7; /* audible bell */ \ - break; \ - case 'b': \ - case 'B': \ - ch = 8; /* backspace */ \ - break; \ - case 'f': \ - case 'F': \ - ch = 12; /* form feed */ \ - break; \ - case 'n': \ - case 'N': \ - ch = 10; /* line feed */ \ - break; \ - case 'r': \ - case 'R': \ - ch = 13; /* carriage return */ \ - break; \ - case 't': \ - case 'T': \ - ch = 9; /* tab */ \ - break; \ - case 'v': \ - case 'V': \ - ch = 11; /* vertical tab */ \ - break; \ - case 'x': /* hex code */ \ - case 'X': \ - GETHEX(ch); \ - break; \ - default: \ - /* other characters passed through */ \ - if (translate) \ - ch = translate[(unsigned char)ch]; \ - break; \ - } \ - MACRO_END - - if (!re_compile_initialized) - re_compile_initialize(); - bufp->used = 0; - bufp->fastmap_accurate = 0; - bufp->uses_registers = 0; - translate = bufp->translate; - pattern = bufp->buffer; - alloc = bufp->allocated; - if (alloc == 0 || pattern == NULL) + SILC_ASSERT(p1[-3] == Cfailure_jump); + p2 = code; + /* p1 points inside loop, p2 points to after loop */ + if (!silc_re_do_compile_fastmap(bufp->buffer, bufp->used, + (int)(p2 - bufp->buffer), + &can_be_null, map, bufp)) + goto make_normal_jump; + + /* If we might introduce a new update point inside the + * loop, we can't optimize because then update_jump would + * update a wrong failure point. Thus we have to be + * quite careful here. + */ + + /* loop until we find something that consumes a character */ + loop_p1: + num_instructions++; + switch (*p1++) { - alloc = 256; - pattern = silc_malloc(alloc); - if (!pattern) - goto out_of_memory; + case Cbol: + case Ceol: + case Cbegbuf: + case Cendbuf: + case Cwordbeg: + case Cwordend: + case Cwordbound: + case Cnotwordbound: + { + goto loop_p1; + } + case Cstart_memory: + case Cend_memory: + { + p1++; + goto loop_p1; + } + case Cexact: + { + ch = (unsigned char)*p1++; + if (map[(int)ch]) + goto make_normal_jump; + break; + } + case Canychar: + { + for (b = 0; b < 256; b++) + if (b != '\n' && map[b]) + goto make_normal_jump; + break; + } + case Cset: + { + for (b = 0; b < 256; b++) + if ((p1[b >> 3] & (1 << (b & 7))) && map[b]) + goto make_normal_jump; + p1 += 256/8; + break; + } + default: + { + goto make_normal_jump; + } } - pattern_offset = 0; - starts_base = 0; - num_jumps = 0; - current_level = 0; - SET_LEVEL_START; - num_open_registers = 0; - next_register = 1; - paren_depth = 0; - beginning_context = 1; - op = -1; - /* we use Rend dummy to ensure that pending jumps are updated (due to - low priority of Rend) before exiting the loop. */ - pos = 0; - while (op != Rend) + /* now we know that we can't backtrack. */ + while (p1 != p2 - 3) { - if (pos >= size) - op = Rend; - else + num_instructions++; + switch (*p1++) { - NEXTCHAR(ch); - if (translate) - ch = translate[(unsigned char)ch]; - op = regexp_plain_ops[(unsigned char)ch]; - if (op == Rquote) - { - NEXTCHAR(ch); - op = regexp_quoted_ops[(unsigned char)ch]; - if (op == Rnormal && regexp_ansi_sequences) - ANSI_TRANSLATE(ch); - } - } - level = regexp_precedences[op]; - /* printf("ch='%c' op=%d level=%d current_level=%d curlevstart=%d\n", - ch, op, level, current_level, CURRENT_LEVEL_START); */ - if (level > current_level) - { - for (current_level++; current_level < level; current_level++) - SET_LEVEL_START; - SET_LEVEL_START; - } - else + case Cend: + { + return 0; + } + case Cbol: + case Ceol: + case Canychar: + case Cbegbuf: + case Cendbuf: + case Cwordbeg: + case Cwordend: + case Cwordbound: + case Cnotwordbound: + { + break; + } + case Cset: + { + p1 += 256/8; + break; + } + case Cexact: + case Cstart_memory: + case Cend_memory: + case Cmatch_memory: + case Csyntaxspec: + case Cnotsyntaxspec: + { + p1++; + break; + } + case Cjump: + case Cstar_jump: + case Cfailure_jump: + case Cupdate_failure_jump: + case Cdummy_failure_jump: + { + goto make_normal_jump; + } + default: + { + return 0; + } + } + } + + /* make_update_jump: */ + code -= 3; + a += 3; /* jump to after the Cfailure_jump */ + code[0] = Cupdate_failure_jump; + code[1] = a & 0xff; + code[2] = a >> 8; + if (num_instructions > 1) + return 1; + SILC_ASSERT(num_instructions == 1); + /* if the only instruction matches a single character, we can do + * better */ + p1 = code + 3 + a; /* start of sole instruction */ + if (*p1 == Cset || *p1 == Cexact || *p1 == Canychar || + *p1 == Csyntaxspec || *p1 == Cnotsyntaxspec) + code[0] = Crepeat1; + return 1; + + make_normal_jump: + code -= 3; + *code = Cjump; + return 1; +} + +static int silc_re_optimize(SilcRegex bufp) +{ + unsigned char *code; + + code = bufp->buffer; + + while(1) + { + switch (*code++) + { + case Cend: + { + return 1; + } + case Canychar: + case Cbol: + case Ceol: + case Cbegbuf: + case Cendbuf: + case Cwordbeg: + case Cwordend: + case Cwordbound: + case Cnotwordbound: + { + break; + } + case Cset: + { + code += 256/8; + break; + } + case Cexact: + case Cstart_memory: + case Cend_memory: + case Cmatch_memory: + case Csyntaxspec: + case Cnotsyntaxspec: + { + code++; + break; + } + case Cstar_jump: + { + if (!silc_re_optimize_star_jump(bufp, code)) + { + return 0; + } + /* fall through */ + } + case Cupdate_failure_jump: + case Cjump: + case Cdummy_failure_jump: + case Cfailure_jump: + case Crepeat1: + { + code += 2; + break; + } + default: + { + return 0; + } + } + } +} + +#define NEXTCHAR(var) \ + { \ + if (pos >= size) \ + goto ends_prematurely; \ + (var) = regex[pos]; \ + pos++; \ + } + +#define ALLOC(amount) \ + { \ + if (pattern_offset+(amount) > alloc) \ + { \ + pattern = silc_srealloc(bufp->rstack, alloc, pattern, \ + alloc + 256 + (amount)); \ + alloc += 256 + (amount); \ + if (!pattern) \ + goto out_of_memory; \ + } \ + } + +#define STORE(ch) pattern[pattern_offset++] = (ch) + +#define CURRENT_LEVEL_START (starts[starts_base + current_level]) + +#define SET_LEVEL_START starts[starts_base + current_level] = pattern_offset + +#define PUSH_LEVEL_STARTS \ + if (starts_base < (MAX_NESTING-1)*NUM_LEVELS) \ + starts_base += NUM_LEVELS; \ + else \ + goto too_complex \ + +#define POP_LEVEL_STARTS starts_base -= NUM_LEVELS + +#define PUT_ADDR(offset,addr) \ + { \ + int disp = (addr) - (offset) - 2; \ + pattern[(offset)] = disp & 0xff; \ + pattern[(offset)+1] = (disp>>8) & 0xff; \ + } + +#define INSERT_JUMP(pos,type,addr) \ + { \ + int a, p = (pos), t = (type), ad = (addr); \ + for (a = pattern_offset - 1; a >= p; a--) \ + pattern[a + 3] = pattern[a]; \ + pattern[p] = t; \ + PUT_ADDR(p+1,ad); \ + pattern_offset += 3; \ + } + +#define SETBIT(buf,offset,bit) (buf)[(offset)+(bit)/8] |= (1<<((bit) & 7)) + +#define SET_FIELDS \ + { \ + bufp->allocated = alloc; \ + bufp->buffer = pattern; \ + bufp->used = pattern_offset; \ + } + +#define GETHEX(var) \ + { \ + unsigned char gethex_ch, gethex_value; \ + NEXTCHAR(gethex_ch); \ + gethex_value = silc_hex_char_to_decimal(gethex_ch); \ + if (gethex_value == 16) \ + goto hex_error; \ + NEXTCHAR(gethex_ch); \ + gethex_ch = silc_hex_char_to_decimal(gethex_ch); \ + if (gethex_ch == 16) \ + goto hex_error; \ + (var) = gethex_value * 16 + gethex_ch; \ + } + +#define ANSI_TRANSLATE(ch) \ + { \ + switch (ch) \ + { \ + case 'a': \ + case 'A': \ + { \ + ch = 7; /* audible bell */ \ + break; \ + } \ + case 'b': \ + case 'B': \ + { \ + ch = 8; /* backspace */ \ + break; \ + } \ + case 'f': \ + case 'F': \ + { \ + ch = 12; /* form feed */ \ + break; \ + } \ + case 'n': \ + case 'N': \ + { \ + ch = 10; /* line feed */ \ + break; \ + } \ + case 'r': \ + case 'R': \ + { \ + ch = 13; /* carriage return */ \ + break; \ + } \ + case 't': \ + case 'T': \ + { \ + ch = 9; /* tab */ \ + break; \ + } \ + case 'v': \ + case 'V': \ + { \ + ch = 11; /* vertical tab */ \ + break; \ + } \ + case 'x': /* hex code */ \ + case 'X': \ + { \ + GETHEX(ch); \ + break; \ + } \ + default: \ + { \ + /* other characters passed through */ \ + if (translate) \ + ch = translate[(unsigned char)ch]; \ + break; \ + } \ + } \ + } + +SilcResult silc_re_compile_pattern(unsigned char *regex, int size, + SilcRegex bufp) +{ + int a; + int pos; + int op; + int current_level; + int level; + int opcode = 0; + int pattern_offset = 0, alloc; + int starts[NUM_LEVELS * MAX_NESTING]; + int starts_base; + int future_jumps[MAX_NESTING]; + int num_jumps; + unsigned char ch = '\0'; + unsigned char *pattern; + unsigned char *translate; + int next_register; + int paren_depth; + int num_open_registers; + int open_registers[RE_NREGS]; + int beginning_context; + + if (!silc_re_compile_initialized) + silc_re_compile_initialize(); + bufp->used = 0; + bufp->fastmap_accurate = 0; + bufp->uses_registers = 1; + bufp->num_registers = 1; + translate = bufp->translate; + pattern = bufp->buffer; + alloc = bufp->allocated; + if (alloc == 0 || pattern == NULL) + { + alloc = 256; + pattern = silc_smalloc(bufp->rstack, alloc); + if (!pattern) + goto out_of_memory; + } + pattern_offset = 0; + starts_base = 0; + num_jumps = 0; + current_level = 0; + SET_LEVEL_START; + num_open_registers = 0; + next_register = 1; + paren_depth = 0; + beginning_context = 1; + op = -1; + /* we use Rend dummy to ensure that pending jumps are updated + (due to low priority of Rend) before exiting the loop. */ + pos = 0; + while (op != Rend) + { + if (pos >= size) + op = Rend; + else + { + NEXTCHAR(ch); + if (translate) + ch = translate[(unsigned char)ch]; + op = silc_regexp_plain_ops[(unsigned char)ch]; + if (op == Rquote) + { + NEXTCHAR(ch); + op = silc_regexp_quoted_ops[(unsigned char)ch]; + if (op == Rnormal && silc_regexp_ansi_sequences) + ANSI_TRANSLATE(ch); + } + } + level = silc_regexp_precedencess[op]; + /* printf("ch='%c' op=%d level=%d current_level=%d + curlevstart=%d\n", ch, op, level, current_level, + CURRENT_LEVEL_START); */ + if (level > current_level) + { + for (current_level++; current_level < level; current_level++) + SET_LEVEL_START; + SET_LEVEL_START; + } + else if (level < current_level) { current_level = level; for (;num_jumps > 0 && - future_jumps[num_jumps-1] >= CURRENT_LEVEL_START; + future_jumps[num_jumps-1] >= CURRENT_LEVEL_START; num_jumps--) PUT_ADDR(future_jumps[num_jumps-1], pattern_offset); } switch (op) { case Rend: - break; + { + break; + } case Rnormal: - normal_char: - opcode = Cexact; - store_opcode_and_arg: /* opcode & ch must be set */ - SET_LEVEL_START; - ALLOC(2); - STORE(opcode); - STORE(ch); - break; + { + normal_char: + opcode = Cexact; + store_opcode_and_arg: /* opcode & ch must be set */ + SET_LEVEL_START; + ALLOC(2); + STORE(opcode); + STORE(ch); + break; + } case Ranychar: - opcode = Canychar; - store_opcode: - SET_LEVEL_START; - ALLOC(1); - STORE(opcode); - break; + { + opcode = Canychar; + store_opcode: + SET_LEVEL_START; + ALLOC(1); + STORE(opcode); + break; + } case Rquote: - abort(); - /*NOTREACHED*/ + { + SILC_VERIFY(op != Rquote); + /*NOTREACHED*/ + } case Rbol: - if (!beginning_context) { - if (regexp_context_indep_ops) - goto op_error; - else - goto normal_char; + { + if (!beginning_context) { + if (silc_regexp_context_indep_ops) + goto op_error; + else + goto normal_char; + } + opcode = Cbol; + goto store_opcode; } - opcode = Cbol; - goto store_opcode; case Reol: - if (!((pos >= size) || - ((regexp_syntax & RE_NO_BK_VBAR) ? - (regex[pos] == '\174') : - (pos+1 < size && regex[pos] == '\134' && - regex[pos+1] == '\174')) || - ((regexp_syntax & RE_NO_BK_PARENS)? - (regex[pos] == ')'): - (pos+1 < size && regex[pos] == '\134' && - regex[pos+1] == ')')))) { - if (regexp_context_indep_ops) - goto op_error; - else - goto normal_char; + { + if (!((pos >= size) || + ((silc_regexp_syntax & RE_NO_BK_VBAR) ? + (regex[pos] == '\174') : + (pos+1 < size && regex[pos] == '\134' && + regex[pos+1] == '\174')) || + ((silc_regexp_syntax & RE_NO_BK_PARENS)? + (regex[pos] == ')'): + (pos+1 < size && regex[pos] == '\134' && + regex[pos+1] == ')')))) { + if (silc_regexp_context_indep_ops) + goto op_error; + else + goto normal_char; + } + opcode = Ceol; + goto store_opcode; + /* NOTREACHED */ + break; } - opcode = Ceol; - goto store_opcode; - break; case Roptional: - if (beginning_context) { - if (regexp_context_indep_ops) - goto op_error; - else - goto normal_char; + { + if (beginning_context) { + if (silc_regexp_context_indep_ops) + goto op_error; + else + goto normal_char; + } + if (CURRENT_LEVEL_START == pattern_offset) + break; /* ignore empty patterns for ? */ + ALLOC(3); + INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump, + pattern_offset + 3); + break; } - if (CURRENT_LEVEL_START == pattern_offset) - break; /* ignore empty patterns for ? */ - ALLOC(3); - INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump, - pattern_offset + 3); - break; case Rstar: case Rplus: - if (beginning_context) { - if (regexp_context_indep_ops) - goto op_error; - else - goto normal_char; + { + if (beginning_context) { + if (silc_regexp_context_indep_ops) + goto op_error; + else + goto normal_char; + } + if (CURRENT_LEVEL_START == pattern_offset) + break; /* ignore empty patterns for + and * */ + store_jump: + ALLOC(9); + INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump, + pattern_offset + 6); + INSERT_JUMP(pattern_offset, Cstar_jump, CURRENT_LEVEL_START); + if (op == Rplus) /* jump over initial failure_jump */ + INSERT_JUMP(CURRENT_LEVEL_START, Cdummy_failure_jump, + CURRENT_LEVEL_START + 6); + break; } - if (CURRENT_LEVEL_START == pattern_offset) - break; /* ignore empty patterns for + and * */ - ALLOC(9); - INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump, - pattern_offset + 6); - INSERT_JUMP(pattern_offset, Cstar_jump, CURRENT_LEVEL_START); - if (op == Rplus) /* jump over initial failure_jump */ - INSERT_JUMP(CURRENT_LEVEL_START, Cdummy_failure_jump, - CURRENT_LEVEL_START + 6); - break; case Ror: - ALLOC(6); - INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump, - pattern_offset + 6); - if (num_jumps >= MAX_NESTING) - goto too_complex; - STORE(Cjump); - future_jumps[num_jumps++] = pattern_offset; - STORE(0); - STORE(0); - SET_LEVEL_START; - break; + { + ALLOC(6); + INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump, + pattern_offset + 6); + if (num_jumps >= MAX_NESTING) + goto too_complex; + STORE(Cjump); + future_jumps[num_jumps++] = pattern_offset; + STORE(0); + STORE(0); + SET_LEVEL_START; + break; + } case Ropenpar: - SET_LEVEL_START; - if (next_register < RE_NREGS) - { - bufp->uses_registers = 1; - ALLOC(2); - STORE(Cstart_memory); - STORE(next_register); - open_registers[num_open_registers++] = next_register; - next_register++; - } - paren_depth++; - PUSH_LEVEL_STARTS; - current_level = 0; - SET_LEVEL_START; - break; + { + SET_LEVEL_START; + if (next_register < RE_NREGS) + { + bufp->uses_registers = 1; + ALLOC(2); + STORE(Cstart_memory); + STORE(next_register); + open_registers[num_open_registers++] = next_register; + bufp->num_registers++; + next_register++; + } + paren_depth++; + PUSH_LEVEL_STARTS; + current_level = 0; + SET_LEVEL_START; + break; + } case Rclosepar: - if (paren_depth <= 0) - goto parenthesis_error; - POP_LEVEL_STARTS; - current_level = regexp_precedences[Ropenpar]; - paren_depth--; - if (paren_depth < num_open_registers) - { - bufp->uses_registers = 1; - ALLOC(2); - STORE(Cend_memory); - num_open_registers--; - STORE(open_registers[num_open_registers]); - } - break; + { + if (paren_depth <= 0) + goto parenthesis_error; + POP_LEVEL_STARTS; + current_level = silc_regexp_precedencess[Ropenpar]; + paren_depth--; + if (paren_depth < num_open_registers) + { + bufp->uses_registers = 1; + ALLOC(2); + STORE(Cend_memory); + num_open_registers--; + STORE(open_registers[num_open_registers]); + } + break; + } case Rmemory: - if (ch == '0') - goto bad_match_register; - assert(ch >= '0' && ch <= '9'); - bufp->uses_registers = 1; - opcode = Cmatch_memory; - ch -= '0'; - goto store_opcode_and_arg; + { + if (ch == '0') + goto bad_match_register; + SILC_ASSERT(ch >= '0' && ch <= '9'); + bufp->uses_registers = 1; + opcode = Cmatch_memory; + ch -= '0'; + goto store_opcode_and_arg; + } case Rextended_memory: - NEXTCHAR(ch); - if (ch < '0' || ch > '9') - goto bad_match_register; - NEXTCHAR(a); - if (a < '0' || a > '9') - goto bad_match_register; - ch = 10 * (a - '0') + ch - '0'; - if (ch <= 0 || ch >= RE_NREGS) - goto bad_match_register; - bufp->uses_registers = 1; - opcode = Cmatch_memory; - goto store_opcode_and_arg; + { + NEXTCHAR(ch); + if (ch < '0' || ch > '9') + goto bad_match_register; + NEXTCHAR(a); + if (a < '0' || a > '9') + goto bad_match_register; + ch = 10 * (a - '0') + ch - '0'; + if (ch == 0 || ch >= RE_NREGS) + goto bad_match_register; + bufp->uses_registers = 1; + opcode = Cmatch_memory; + goto store_opcode_and_arg; + } case Ropenset: { - int complement,prev,offset,range,firstchar; + int complement; + int prev; + int offset; + int range; + int firstchar; SET_LEVEL_START; ALLOC(1+256/8); @@ -698,14 +1480,14 @@ SilcRegex bufp; while (ch != '\135' || firstchar) { firstchar = 0; - if (regexp_ansi_sequences && ch == '\134') + if (silc_regexp_ansi_sequences && ch == '\134') { NEXTCHAR(ch); ANSI_TRANSLATE(ch); } if (range) { - for (a = prev; a <= ch; a++) + for (a = prev; a <= (int)ch; a++) SETBIT(pattern, offset, a); prev = -1; range = 0; @@ -731,62 +1513,209 @@ SilcRegex bufp; } break; } + case Ropenrep: + { + /* The bounded repeat syntax: a{n}, a{n,} and a{n,m}. The first + is compiled as n-1 Rnormals. The second is compiled as n-1 + Rnormals and one Rplus. The third is compiled as n-1 Rnormals + and m-n Rnormals with Roptionals. 0 values have special + compilation. */ + int min, max, i, alen = 2; + + if (pos >= size) + goto normal_char; /* Consider literal */ + + /* Get the preceding atom */ + if (pos < 2) + goto normal_char; /* Consider literal */ + pos -= 2; + NEXTCHAR(a); + if (translate) + a = translate[(unsigned char)a]; + op = silc_regexp_plain_ops[(unsigned char)a]; + + if (op == Ranychar) { + opcode = Canychar; + a = 0; + alen = 1; + } + + NEXTCHAR(ch); + + /* Get min value */ + NEXTCHAR(ch); + if (!isdigit(ch)) + goto normal_char; /* Consider literal */ + min = ch - '0'; + NEXTCHAR(ch); + while (isdigit(ch)) { + min *= 10; + min += ch - '0'; + NEXTCHAR(ch); + } + + if (min > 255) + goto repeat_value_error; + + if (ch == '}') { + /* The a{n} case */ + + if (!min) { + /* Will not do any matching with a{0} */ + pattern_offset -= 2; + break; + } + + /* Store min - 1 many Cexacts. */ + for (i = 0; i < min - 1; i++) { + SET_LEVEL_START; + ALLOC(alen); + STORE(opcode); + if (a) + STORE((unsigned char)a); + } + break; + } + + if (ch == ',') { + NEXTCHAR(ch); + + if (ch == '}') { + /* The a{n,} case */ + + if (!min) { + /* Store Rstar with a{0,} */ + op = Rstar; + goto store_jump; + } + + /* Store min - 1 many Cexacts. */ + for (i = 0; i < min - 1; i++) { + SET_LEVEL_START; + ALLOC(alen); + STORE(opcode); + if (a) + STORE((unsigned char)a); + } + + /* Store Rplus */ + op = Rplus; + goto store_jump; + } + + /* The a{n,m} case */ + + /* Get max value */ + if (!isdigit(ch)) + goto repeat_value_error; + max = ch - '0'; + NEXTCHAR(ch); + while (isdigit(ch)) { + max *= 10; + max += ch - '0'; + NEXTCHAR(ch); + } + + if (max > 255) + goto repeat_value_error; + if (min > max) + goto repeat_value_error; + + if (!min && !max) { + /* Will not do any matching with a{0,0} */ + pattern_offset -= 2; + break; + } + + if (ch != '}') + goto op_error; + + if (!min) + /* Only optional matching with a{0,m}. */ + pattern_offset -= 2; + + /* Store min - 1 many Cexacts. */ + for (i = 0; min && i < min - 1; i++) { + SET_LEVEL_START; + ALLOC(alen); + STORE(opcode); + if (a) + STORE((unsigned char)a); + } + + /* Store max - min Cexacts and Roptionals. */ + for (i = 0; i < max - min; i++) { + SET_LEVEL_START; + ALLOC(alen); + STORE(opcode); + if (a) + STORE((unsigned char)a); + ALLOC(3); + INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump, + pattern_offset + 3); + } + break; + } + + goto op_error; + } case Rbegbuf: - opcode = Cbegbuf; - goto store_opcode; + { + opcode = Cbegbuf; + goto store_opcode; + } case Rendbuf: - opcode = Cendbuf; - goto store_opcode; + { + opcode = Cendbuf; + goto store_opcode; + } case Rwordchar: - opcode = Csyntaxspec; - ch = Sword; - goto store_opcode_and_arg; + { + opcode = Csyntaxspec; + ch = Sword; + goto store_opcode_and_arg; + } case Rnotwordchar: - opcode = Cnotsyntaxspec; - ch = Sword; - goto store_opcode_and_arg; + { + opcode = Cnotsyntaxspec; + ch = Sword; + goto store_opcode_and_arg; + } case Rwordbeg: - opcode = Cwordbeg; - goto store_opcode; + { + opcode = Cwordbeg; + goto store_opcode; + } case Rwordend: - opcode = Cwordend; - goto store_opcode; + { + opcode = Cwordend; + goto store_opcode; + } case Rwordbound: - opcode = Cwordbound; - goto store_opcode; + { + opcode = Cwordbound; + goto store_opcode; + } case Rnotwordbound: - opcode = Cnotwordbound; - goto store_opcode; -#ifdef emacs - case Remacs_at_dot: - opcode = Cemacs_at_dot; - goto store_opcode; - case Remacs_syntaxspec: - NEXTCHAR(ch); - if (translate) - ch = translate[(unsigned char)ch]; - opcode = Csyntaxspec; - ch = syntax_spec_code[(unsigned char)ch]; - goto store_opcode_and_arg; - case Remacs_notsyntaxspec: - NEXTCHAR(ch); - if (translate) - ch = translate[(unsigned char)ch]; - opcode = Cnotsyntaxspec; - ch = syntax_spec_code[(unsigned char)ch]; - goto store_opcode_and_arg; -#endif /* emacs */ + { + opcode = Cnotwordbound; + goto store_opcode; + } default: - abort(); + { + abort(); + } } beginning_context = (op == Ropenpar || op == Ror); } if (starts_base != 0) goto parenthesis_error; - assert(num_jumps == 0); + SILC_ASSERT(num_jumps == 0); ALLOC(1); STORE(Cend); SET_FIELDS; + if (!silc_re_optimize(bufp)) + return SILC_ERR; return SILC_OK; op_error: @@ -816,7 +1745,12 @@ SilcRegex bufp; too_complex: SET_FIELDS; return SILC_ERR_REGEX_TOO_COMPLEX; + + repeat_value_error: + SET_FIELDS; + return SILC_ERR_REGEX_REPEAT; } + #undef CHARAT #undef NEXTCHAR #undef GETHEX @@ -831,642 +1765,464 @@ SilcRegex bufp; #undef SETBIT #undef SET_FIELDS -static void re_compile_fastmap_aux(code, pos, visited, can_be_null, fastmap) -char *code, *visited, *can_be_null, *fastmap; -int pos; -{ - int a, b, syntaxcode; +#define PREFETCH if (text == textend) goto fail - if (visited[pos]) - return; /* we have already been here */ - visited[pos] = 1; - for (;;) - switch (code[pos++]) - { - case Cend: - *can_be_null = 1; - return; - case Cbol: - case Cbegbuf: - case Cendbuf: - case Cwordbeg: - case Cwordend: - case Cwordbound: - case Cnotwordbound: -#ifdef emacs - case Cemacs_at_dot: -#endif /* emacs */ - break; - case Csyntaxspec: - syntaxcode = code[pos++]; - for (a = 0; a < 256; a++) - if (SYNTAX(a) == syntaxcode) - fastmap[a] = 1; - return; - case Cnotsyntaxspec: - syntaxcode = code[pos++]; - for (a = 0; a < 256; a++) - if (SYNTAX(a) != syntaxcode) - fastmap[a] = 1; - return; - case Ceol: - fastmap['\n'] = 1; - if (*can_be_null == 0) - *can_be_null = 2; /* can match null, but only at end of buffer*/ - return; - case Cset: - for (a = 0; a < 256/8; a++) - if (code[pos + a] != 0) - for (b = 0; b < 8; b++) - if (code[pos + a] & (1 << b)) - fastmap[(a << 3) + b] = 1; - pos += 256/8; - return; - case Cexact: - fastmap[(unsigned char)code[pos]] = 1; - return; - case Canychar: - for (a = 0; a < 256; a++) - if (a != '\n') - fastmap[a] = 1; - return; - case Cstart_memory: - case Cend_memory: - pos++; - break; - case Cmatch_memory: - for (a = 0; a < 256; a++) - fastmap[a] = 1; - *can_be_null = 1; - return; - case Cjump: - case Cdummy_failure_jump: - case Cupdate_failure_jump: - case Cstar_jump: - a = (unsigned char)code[pos++]; - a |= (unsigned char)code[pos++] << 8; - pos += (int)(short)a; - if (visited[pos]) - { - /* argh... the regexp contains empty loops. This is not - good, as this may cause a failure stack overflow when - matching. Oh well. */ - /* this path leads nowhere; pursue other paths. */ - return; - } - visited[pos] = 1; - break; - case Cfailure_jump: - a = (unsigned char)code[pos++]; - a |= (unsigned char)code[pos++] << 8; - a = pos + (int)(short)a; - re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap); - break; - default: - abort(); /* probably some opcode is missing from this switch */ - /*NOTREACHED*/ - } -} +#define NEXTCHAR(var) \ + PREFETCH; \ + var = (unsigned char)*text++; \ + if (translate) \ + var = translate[var] -static int re_do_compile_fastmap(buffer, used, pos, can_be_null, fastmap) -char *buffer, *fastmap, *can_be_null; -int used, pos; +int silc_re_match(SilcRegex bufp, unsigned char *string, int size, int pos, + regexp_registers_t old_regs, unsigned int flags) { - char small_visited[512], *visited; - - if (used <= sizeof(small_visited)) - visited = small_visited; - else - { - visited = silc_malloc(used); - if (!visited) - return 0; - } - *can_be_null = 0; - memset(fastmap, 0, 256); - memset(visited, 0, used); - re_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap); - if (visited != small_visited) - silc_free(visited); - return 1; -} + unsigned char *code; + unsigned char *translate; + unsigned char *text; + unsigned char *textstart; + unsigned char *textend; + int a; + int b; + int ch; + int reg; + int match_end; + unsigned char *regstart; + unsigned char *regend; + int regsize; + match_state state; + + SILC_ASSERT(pos >= 0 && size >= 0); + SILC_ASSERT(pos <= size); + + text = string + pos; + textstart = string; + textend = string + size; -void re_compile_fastmap(bufp) -SilcRegex bufp; -{ - if (!bufp->fastmap || bufp->fastmap_accurate) - return; - assert(bufp->used > 0); - if (!re_do_compile_fastmap(bufp->buffer, bufp->used, 0, &bufp->can_be_null, - bufp->fastmap)) - return; - if (bufp->buffer[0] == Cbol) - bufp->anchor = 1; /* begline */ - else - if (bufp->buffer[0] == Cbegbuf) - bufp->anchor = 2; /* begbuf */ - else - bufp->anchor = 0; /* none */ - bufp->fastmap_accurate = 1; -} + code = bufp->buffer; -#define INITIAL_FAILURES 128 /* initial # failure points to allocate */ -#define MAX_FAILURES 4100 /* max # of failure points before failing */ + translate = bufp->translate; -int re_match_2(bufp, string1, size1, string2, size2, pos, regs, mstop) -SilcRegex bufp; -char *string1, *string2; -int size1, size2, pos, mstop; -regexp_registers_t regs; -{ - struct failure_point { char *text, *partend, *code; } - *failure_stack_start, *failure_sp, *failure_stack_end, - initial_failure_stack[INITIAL_FAILURES]; - char *code, *translate, *text, *textend, *partend, *part_2_end; - char *regstart_text[RE_NREGS], *regstart_partend[RE_NREGS]; - char *regend_text[RE_NREGS], *regend_partend[RE_NREGS]; - int a, b, ch, reg, regch, match_end; - char *regtext, *regpartend, *regtextend; - -#define PREFETCH \ - MACRO_BEGIN \ - if (text == partend) \ - { \ - if (text == textend) \ - goto fail; \ - text = string2; \ - partend = part_2_end; \ - } \ - MACRO_END + NEW_STATE(state, bufp->num_registers); -#define NEXTCHAR(var) \ - MACRO_BEGIN \ - PREFETCH; \ - (var) = (unsigned char)*text++; \ - if (translate) \ - (var) = (unsigned char)translate[(var)]; \ - MACRO_END - - assert(pos >= 0 && size1 >= 0 && size2 >= 0 && mstop >= 0); - assert(mstop <= size1 + size2); - assert(pos <= mstop); - - if (pos <= size1) + continue_matching: + switch (*code++) { - text = string1 + pos; - if (mstop <= size1) - { - partend = string1 + mstop; - textend = partend; + case Cend: + { + match_end = text - textstart; + if (old_regs) + { + old_regs->start[0] = pos; + old_regs->end[0] = match_end; + if (!bufp->uses_registers) + { + for (a = 1; a < RE_NREGS; a++) + { + old_regs->start[a] = -1; + old_regs->end[a] = -1; + } + } + else + { + for (a = 1; a < bufp->num_registers; a++) + { + if ((GET_REG_START(state, a) == NULL) || + (GET_REG_END(state, a) == NULL)) + { + old_regs->start[a] = -1; + old_regs->end[a] = -1; + continue; + } + old_regs->start[a] = GET_REG_START(state, a) - textstart; + old_regs->end[a] = GET_REG_END(state, a) - textstart; + } + for (; a < RE_NREGS; a++) + { + old_regs->start[a] = -1; + old_regs->end[a] = -1; + } + } + } + FREE_STATE(state); + return match_end - pos; + } + case Cbol: + { + if (text == textstart || text[-1] == '\n') { + if (flags & RE_NOTBOL) + goto fail; + goto continue_matching; } - else - { - partend = string1 + size1; - textend = string2 + mstop - size1; + goto fail; + } + case Ceol: + { + if (text == textend || *text == '\n') { + if (flags & RE_NOTEOL) + goto fail; + goto continue_matching; } - part_2_end = string2 + mstop - size1; - } - else - { - text = string2 + pos - size1; - partend = string2 + mstop - size1; - textend = partend; - part_2_end = partend; - } - - if (bufp->uses_registers && regs != NULL) - for (a = 0; a < RE_NREGS; a++) - regend_text[a] = NULL; - - code = bufp->buffer; - translate = bufp->translate; - failure_stack_start = failure_sp = initial_failure_stack; - failure_stack_end = initial_failure_stack + INITIAL_FAILURES; - -#if 0 - /* re_search_2 has already done this, and otherwise we get little benefit - from this. So I'll leave this out. */ - if (bufp->fastmap_accurate && !bufp->can_be_null && - text != textend && - !bufp->fastmap[translate ? - (unsigned char)translate[(unsigned char)*text] : - (unsigned char)*text]) - return -1; /* it can't possibly match */ -#endif - - continue_matching: - for (;;) - { - switch (*code++) - { - case Cend: - if (partend != part_2_end) - match_end = text - string1; - else - match_end = text - string2 + size1; - if (regs) + goto fail; + } + case Cset: + { + NEXTCHAR(ch); + if (code[ch/8] & (1<<(ch & 7))) + { + code += 256/8; + goto continue_matching; + } + goto fail; + } + case Cexact: + { + NEXTCHAR(ch); + if (ch != (unsigned char)*code++) + goto fail; + goto continue_matching; + } + case Canychar: + { + NEXTCHAR(ch); + if (ch == '\n') + goto fail; + goto continue_matching; + } + case Cstart_memory: + { + reg = *code++; + SET_REG_START(state, reg, text, goto error); + goto continue_matching; + } + case Cend_memory: + { + reg = *code++; + SET_REG_END(state, reg, text, goto error); + goto continue_matching; + } + case Cmatch_memory: + { + reg = *code++; + regstart = GET_REG_START(state, reg); + regend = GET_REG_END(state, reg); + if ((regstart == NULL) || (regend == NULL)) + goto fail; /* or should we just match nothing? */ + regsize = regend - regstart; + + if (regsize > (textend - text)) + goto fail; + if(translate) + { + for (; regstart < regend; regstart++, text++) + if (translate[*regstart] != translate[*text]) + goto fail; + } + else + for (; regstart < regend; regstart++, text++) + if (*regstart != *text) + goto fail; + goto continue_matching; + } + case Cupdate_failure_jump: + { + UPDATE_FAILURE(state, text, goto error); + /* fall to next case */ + } + /* treat Cstar_jump just like Cjump if it hasn't been optimized */ + case Cstar_jump: + case Cjump: + { + a = (unsigned char)*code++; + a |= (unsigned char)*code++ << 8; + code += (int)SHORT(a); + if (codebuffer || bufp->buffer+bufp->usedbuffer || bufp->buffer+bufp->used < failuredest) { + silc_set_errno(SILC_ERR_OVERFLOW); + FREE_STATE(state); + return -2; + } + PUSH_FAILURE(state, failuredest, NULL, goto error); + code += a; + if (codebuffer || bufp->buffer+bufp->used < code) { + silc_set_errno(SILC_ERR_OVERFLOW); + FREE_STATE(state); + return -2; + } + goto continue_matching; + } + case Cfailure_jump: + { + a = (unsigned char)*code++; + a |= (unsigned char)*code++ << 8; + a = (int)SHORT(a); + if (code+abuffer || bufp->buffer+bufp->used < code+a) { + silc_set_errno(SILC_ERR_OVERFLOW); + FREE_STATE(state); + return -2; + } + PUSH_FAILURE(state, code + a, text, goto error); + goto continue_matching; + } + case Crepeat1: + { + unsigned char *pinst; + a = (unsigned char)*code++; + a |= (unsigned char)*code++ << 8; + a = (int)SHORT(a); + pinst = code + a; + if (pinstbuffer || bufp->buffer+bufp->usedstart[0] = pos; - regs->end[0] = match_end; - if (!bufp->uses_registers) + if (translate) { - for (a = 1; a < RE_NREGS; a++) + while (text < textend) { - regs->start[a] = -1; - regs->end[a] = -1; + ch = translate[(unsigned char)*text]; + if (pinst[ch/8] & (1<<(ch & 7))) + text++; + else + break; } } else { - for (a = 1; a < RE_NREGS; a++) + while (text < textend) { - if (regend_text[a] == NULL) - { - regs->start[a] = -1; - regs->end[a] = -1; - continue; - } - if (regstart_partend[a] != part_2_end) - regs->start[a] = regstart_text[a] - string1; - else - regs->start[a] = regstart_text[a] - string2 + size1; - if (regend_partend[a] != part_2_end) - regs->end[a] = regend_text[a] - string1; + ch = (unsigned char)*text; + if (pinst[ch/8] & (1<<(ch & 7))) + text++; else - regs->end[a] = regend_text[a] - string2 + size1; + break; } } + break; } - if (failure_stack_start != initial_failure_stack) - silc_free((char *)failure_stack_start); - return match_end - pos; - case Cbol: - if (text == string1 || text[-1] == '\n') /* text[-1] always valid */ - break; - goto fail; - case Ceol: - if (text == string2 + size2 || - (text == string1 + size1 ? - (size2 == 0 || *string2 == '\n') : - *text == '\n')) - break; - goto fail; - case Cset: - NEXTCHAR(ch); - if (code[ch/8] & (1<<(ch & 7))) + case Cexact: { - code += 256/8; + ch = (unsigned char)*pinst; + if (translate) + { + while (text < textend && + translate[(unsigned char)*text] == ch) + text++; + } + else + { + while (text < textend && (unsigned char)*text == ch) + text++; + } break; } - goto fail; - case Cexact: - NEXTCHAR(ch); - if (ch != (unsigned char)*code++) - goto fail; - break; - case Canychar: - NEXTCHAR(ch); - if (ch == '\n') - goto fail; - break; - case Cstart_memory: - reg = *code++; - regstart_text[reg] = text; - regstart_partend[reg] = partend; - break; - case Cend_memory: - reg = *code++; - regend_text[reg] = text; - regend_partend[reg] = partend; - break; - case Cmatch_memory: - reg = *code++; - if (regend_text[reg] == NULL) - goto fail; /* or should we just match nothing? */ - regtext = regstart_text[reg]; - regtextend = regend_text[reg]; - if (regstart_partend[reg] == regend_partend[reg]) - regpartend = regtextend; - else - regpartend = string1 + size1; - - for (;regtext != regtextend;) + case Canychar: { - NEXTCHAR(ch); - if (regtext == regpartend) - regtext = string2; - regch = (unsigned char)*regtext++; - if (translate) - regch = (unsigned char)translate[regch]; - if (regch != ch) - goto fail; + while (text < textend && (unsigned char)*text != '\n') + text++; + break; } - break; - case Cstar_jump: - /* star is coded as: - 1: failure_jump 2 - ... code for operand of star - star_jump 1 - 2: ... code after star - We change the star_jump to update_failure_jump if we can determine - that it is safe to do so; otherwise we change it to an ordinary - jump. - plus is coded as - jump 2 - 1: failure_jump 3 - 2: ... code for operand of plus - star_jump 1 - 3: ... code after plus - For star_jump considerations this is processed identically - to star. */ - a = (unsigned char)*code++; - a |= (unsigned char)*code++ << 8; - a = (int)(short)a; - { - char map[256], can_be_null; - char *p1, *p2; - - p1 = code + a + 3; /* skip the failure_jump */ - assert(p1[-3] == Cfailure_jump); - p2 = code; - /* p1 points inside loop, p2 points to after loop */ - if (!re_do_compile_fastmap(bufp->buffer, bufp->used, - p2 - bufp->buffer, &can_be_null, map)) - goto make_normal_jump; - /* If we might introduce a new update point inside the loop, - we can't optimize because then update_jump would update a - wrong failure point. Thus we have to be quite careful here. */ - loop_p1: - /* loop until we find something that consumes a character */ - switch (*p1++) - { - case Cbol: - case Ceol: - case Cbegbuf: - case Cendbuf: - case Cwordbeg: - case Cwordend: - case Cwordbound: - case Cnotwordbound: -#ifdef emacs - case Cemacs_at_dot: -#endif /* emacs */ - goto loop_p1; - case Cstart_memory: - case Cend_memory: - p1++; - goto loop_p1; - case Cexact: - ch = (unsigned char)*p1++; - if (map[ch]) - goto make_normal_jump; - break; - case Canychar: - for (b = 0; b < 256; b++) - if (b != '\n' && map[b]) - goto make_normal_jump; - break; - case Cset: - for (b = 0; b < 256; b++) - if ((p1[b >> 3] & (1 << (b & 7))) && map[b]) - goto make_normal_jump; - p1 += 256/8; - break; - default: - goto make_normal_jump; - } - /* now we know that we can't backtrack. */ - while (p1 != p2 - 3) - { - switch (*p1++) - { - case Cend: - abort(); /* we certainly shouldn't get this inside loop */ - /*NOTREACHED*/ - case Cbol: - case Ceol: - case Canychar: - case Cbegbuf: - case Cendbuf: - case Cwordbeg: - case Cwordend: - case Cwordbound: - case Cnotwordbound: -#ifdef emacs - case Cemacs_at_dot: -#endif /* emacs */ - break; - case Cset: - p1 += 256/8; - break; - case Cexact: - case Cstart_memory: - case Cend_memory: - case Cmatch_memory: - case Csyntaxspec: - case Cnotsyntaxspec: - p1++; - break; - case Cjump: - case Cstar_jump: - case Cfailure_jump: - case Cupdate_failure_jump: - case Cdummy_failure_jump: - goto make_normal_jump; - default: - printf("regexpr.c: processing star_jump: unknown op %d\n", p1[-1]); - break; - } - } - goto make_update_jump; - } - make_normal_jump: - /* printf("changing to normal jump\n"); */ - code -= 3; - *code = Cjump; - break; - make_update_jump: - /* printf("changing to update jump\n"); */ - code -= 2; - a += 3; /* jump to after the Cfailure_jump */ - code[-1] = Cupdate_failure_jump; - code[0] = a & 0xff; - code[1] = a >> 8; - /* fall to next case */ - case Cupdate_failure_jump: - failure_sp[-1].text = text; - failure_sp[-1].partend = partend; - /* fall to next case */ - case Cjump: - a = (unsigned char)*code++; - a |= (unsigned char)*code++ << 8; - code += (int)(short)a; - break; - case Cdummy_failure_jump: - case Cfailure_jump: - if (failure_sp == failure_stack_end) + case Csyntaxspec: { - if (failure_stack_start != initial_failure_stack) - goto error; - failure_stack_start = (struct failure_point *) - silc_malloc(MAX_FAILURES * sizeof(*failure_stack_start)); - failure_stack_end = failure_stack_start + MAX_FAILURES; - memcpy((char *)failure_stack_start, (char *)initial_failure_stack, - INITIAL_FAILURES * sizeof(*failure_stack_start)); - failure_sp = failure_stack_start + INITIAL_FAILURES; + a = (unsigned char)*pinst; + if (translate) + { + while (text < textend && + (SYNTAX(translate[*text]) & a) ) + text++; + } + else + { + while (text < textend && (SYNTAX(*text) & a) ) + text++; + } + break; } - a = (unsigned char)*code++; - a |= (unsigned char)*code++ << 8; - a = (int)(short)a; - if (code[-3] == Cdummy_failure_jump) - { /* this is only used in plus */ - assert(*code == Cfailure_jump); - b = (unsigned char)code[1]; - b |= (unsigned char)code[2] << 8; - failure_sp->code = code + (int)(short)b + 3; - failure_sp->text = NULL; - code += a; + case Cnotsyntaxspec: + { + a = (unsigned char)*pinst; + if (translate) + { + while (text < textend && + !(SYNTAX(translate[*text]) & a) ) + text++; + } + else + { + while (text < textend && !(SYNTAX(*text) & a) ) + text++; + } + break; } - else + default: { - failure_sp->code = code + a; - failure_sp->text = text; - failure_sp->partend = partend; + FREE_STATE(state); + silc_set_errno(SILC_ERR_REGEX_OPCODE); + return -2; + /*NOTREACHED*/ } - failure_sp++; - break; - case Cbegbuf: - if (text == string1) - break; + } + /* due to the funky way + and * are compiled, the top + * failure- stack entry at this point is actually a + * success entry -- update it & pop it */ + UPDATE_FAILURE(state, text, goto error); + goto fail; /* i.e., succeed */ + } + case Cbegbuf: + { + if (text == textstart) + goto continue_matching; + goto fail; + } + case Cendbuf: + { + if (text == textend) + goto continue_matching; + goto fail; + } + case Cwordbeg: + { + if (text == textend) goto fail; - case Cendbuf: - if (size2 == 0 ? text == string1 + size1 : text == string2 + size2) - break; + if (!(SYNTAX(*text) & Sword)) goto fail; - case Cwordbeg: - if (text == string2 + size2) - goto fail; - if (size2 == 0 && text == string1 + size1) - goto fail; - if (SYNTAX(text == string1 + size1 ? *string1 : *text) != Sword) - goto fail; - if (text == string1) - break; - if (SYNTAX(text[-1]) != Sword) - break; + if (text == textstart) + goto continue_matching; + if (!(SYNTAX(text[-1]) & Sword)) + goto continue_matching; + goto fail; + } + case Cwordend: + { + if (text == textstart) goto fail; - case Cwordend: - if (text == string1) - goto fail; - if (SYNTAX(text[-1]) != Sword) - goto fail; - if (text == string2 + size2) - break; - if (size2 == 0 && text == string1 + size1) - break; - if (SYNTAX(*text) == Sword) - goto fail; - break; - case Cwordbound: - /* Note: as in gnu regexp, this also matches at the beginning - and end of buffer. */ - if (text == string1 || text == string2 + size2 || - (size2 == 0 && text == string1 + size1)) - break; - if ((SYNTAX(text[-1]) == Sword) ^ - (SYNTAX(text == string1 + size1 ? *string2 : *text) == Sword)) - break; + if (!(SYNTAX(text[-1]) & Sword)) goto fail; - case Cnotwordbound: - /* Note: as in gnu regexp, this never matches at the beginning - and end of buffer. */ - if (text == string1 || text == string2 + size2 || - (size2 == 0 && text == string1 + size1)) - goto fail; - if (!((SYNTAX(text[-1]) == Sword) ^ - (SYNTAX(text == string1 + size1 ? *string2 : *text) == Sword))) - goto fail; - break; - case Csyntaxspec: - NEXTCHAR(ch); - if (SYNTAX(ch) != (unsigned char)*code++) - goto fail; - break; - case Cnotsyntaxspec: - NEXTCHAR(ch); - if (SYNTAX(ch) != (unsigned char)*code++) - break; + if (text == textend) + goto continue_matching; + if (!(SYNTAX(*text) & Sword)) + goto continue_matching; + goto fail; + } + case Cwordbound: + { + /* Note: as in gnu regexp, this also matches at the + * beginning and end of buffer. */ + + if (text == textstart || text == textend) + goto continue_matching; + if ((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword)) + goto continue_matching; + goto fail; + } + case Cnotwordbound: + { + /* Note: as in gnu regexp, this never matches at the + * beginning and end of buffer. */ + if (text == textstart || text == textend) goto fail; -#ifdef emacs - case Cemacs_at_dot: - if (PTR_CHAR_POS((unsigned char *)text) + 1 != point) - goto fail; - break; -#endif /* emacs */ - default: - abort(); - /*NOTREACHED*/ - } + if (!((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword))) + goto continue_matching; + goto fail; + } + case Csyntaxspec: + { + NEXTCHAR(ch); + if (!(SYNTAX(ch) & (unsigned char)*code++)) + goto fail; + goto continue_matching; + } + case Cnotsyntaxspec: + { + NEXTCHAR(ch); + if (SYNTAX(ch) & (unsigned char)*code++) + goto fail; + goto continue_matching; + } + default: + { + FREE_STATE(state); + silc_set_errno(SILC_ERR_REGEX_OPCODE); + return -2; + /*NOTREACHED*/ + } } - abort(); - /*NOTREACHED*/ + /* Using "break;" in the above switch statement is equivalent to + "goto fail;" */ fail: - if (failure_sp != failure_stack_start) - { - failure_sp--; - text = failure_sp->text; - if (text == NULL) - goto fail; - partend = failure_sp->partend; - code = failure_sp->code; - goto continue_matching; - } - if (failure_stack_start != initial_failure_stack) - silc_free((char *)failure_stack_start); + POP_FAILURE(state, code, text, goto done_matching, goto error); + goto continue_matching; + + done_matching: + /* if(translated != NULL) */ + /* free(translated); */ + FREE_STATE(state); return -1; error: - if (failure_stack_start != initial_failure_stack) - silc_free((char *)failure_stack_start); + /* if (translated != NULL) */ + /* free(translated); */ + FREE_STATE(state); return -2; } #undef PREFETCH #undef NEXTCHAR -#undef PUSH_FAILURE - -int re_match(bufp, string, size, pos, regs) -SilcRegex bufp; -char *string; -int size, pos; -regexp_registers_t regs; -{ - return re_match_2(bufp, string, size, (char *)NULL, 0, pos, regs, size); -} -int re_search_2(bufp, string1, size1, string2, size2, pos, range, regs, - mstop) -SilcRegex bufp; -char *string1, *string2; -int size1, size2, pos, range, mstop; -regexp_registers_t regs; +int silc_re_search(SilcRegex bufp, unsigned char *string, int size, int pos, + int range, regexp_registers_t regs, unsigned int flags) { - char *fastmap, *translate, *text, *partstart, *partend; - int dir, ret; - char anchor; + unsigned char *fastmap; + unsigned char *translate; + unsigned char *text; + unsigned char *partstart; + unsigned char *partend; + int dir; + int ret; + unsigned char anchor; - assert(size1 >= 0 && size2 >= 0 && pos >= 0 && mstop >= 0); - assert(pos + range >= 0 && pos + range <= size1 + size2); - assert(pos <= mstop); + SILC_ASSERT(size >= 0 && pos >= 0); + SILC_ASSERT(pos + range >= 0 && pos + range <= size); /* Bugfix by ylo */ fastmap = bufp->fastmap; translate = bufp->translate; - if (fastmap && !bufp->fastmap_accurate) - re_compile_fastmap(bufp); + if (fastmap && !bufp->fastmap_accurate) { + if (silc_re_compile_fastmap(bufp)) + return -2; + } + anchor = bufp->anchor; if (bufp->can_be_null == 1) /* can_be_null == 2: can match null at eob */ fastmap = NULL; + if (range < 0) { dir = -1; @@ -1474,60 +2230,40 @@ regexp_registers_t regs; } else dir = 1; + if (anchor == 2) { if (pos != 0) return -1; else range = 0; } + for (; range >= 0; range--, pos += dir) { if (fastmap) { if (dir == 1) { /* searching forwards */ - if (pos < size1) - { - text = string1 + pos; - if (pos + range > size1) - partend = string1 + size1; - else - partend = string1 + pos + range; - } - else - { - text = string2 + pos - size1; - partend = string2 + pos + range - size1; - } + + text = string + pos; + partend = string + size; partstart = text; if (translate) while (text != partend && - !fastmap[(unsigned char) - translate[(unsigned char)*text]]) + !fastmap[(unsigned char) translate[(unsigned char)*text]]) text++; else while (text != partend && !fastmap[(unsigned char)*text]) text++; pos += text - partstart; range -= text - partstart; - if (pos == size1 + size2 && bufp->can_be_null == 0) + if (pos == size && bufp->can_be_null == 0) return -1; } else { /* searching backwards */ - if (pos <= size1) - { - text = string1 + pos; - partstart = string1 + pos - range; - } - else - { - text = string2 + pos - size1; - if (range < pos - size1) - partstart = string2 + pos - size1 - range; - else - partstart = string2; - } + text = string + pos; + partstart = string + pos - range; partend = text; if (translate) while (text != partstart && @@ -1544,13 +2280,11 @@ regexp_registers_t regs; } if (anchor == 1) { /* anchored to begline */ - if (pos > 0 && - (pos <= size1 ? string1[pos - 1] : - string2[pos - size1 - 1]) != '\n') + if (pos > 0 && (string[pos - 1] != '\n')) continue; } - assert(pos >= 0 && pos <= size1 + size2); - ret = re_match_2(bufp, string1, size1, string2, size2, pos, regs, mstop); + SILC_ASSERT(pos >= 0 && pos <= size); + ret = silc_re_match(bufp, string, size, pos, regs, flags); if (ret >= 0) return pos; if (ret == -2) @@ -1559,16 +2293,6 @@ regexp_registers_t regs; return -1; } -int re_search(bufp, string, size, startpos, range, regs) -SilcRegex bufp; -char *string; -int size, startpos, range; -regexp_registers_t regs; -{ - return re_search_2(bufp, string, size, (char *)NULL, 0, - startpos, range, regs, size); -} - /****************************** SILC Regex API ******************************/ /* Compile regular expression */ @@ -1577,7 +2301,6 @@ SilcBool silc_regex_compile(SilcRegex regexp, const char *regex, SilcRegexFlags flags) { SilcResult ret; - int syntax = 0; if (!regexp || !regex) { silc_set_errno(SILC_ERR_INVALID_ARGUMENT); @@ -1586,26 +2309,34 @@ SilcBool silc_regex_compile(SilcRegex regexp, const char *regex, memset(regexp, 0, sizeof(*regexp)); - /* Set syntax */ - syntax |= (RE_CONTEXT_INDEP_OPS | RE_NO_BK_PARENS | RE_NO_BK_VBAR); - re_set_syntax(syntax); + /* Get global stack, if set, and create child stack. */ + regexp->rstack = silc_stack_get_global(); + if (regexp->rstack) + regexp->rstack = silc_stack_alloc(512, regexp->rstack); /* Compile */ - ret = re_compile_pattern((char *)regex, strlen(regex), regexp); + ret = silc_re_compile_pattern((unsigned char *)regex, strlen(regex), regexp); if (ret != SILC_OK) silc_set_errno(ret); + if (ret != SILC_OK) { + silc_regex_free(regexp); + regexp->rstack = NULL; + regexp->buffer = NULL; + } + return ret == SILC_OK; } /* Match compiled regular expression */ SilcBool silc_regex_match(SilcRegex regexp, const char *string, - SilcUInt32 num_match, SilcRegexMatch match, - SilcRegexFlags flags) + SilcUInt32 string_len, SilcUInt32 num_match, + SilcRegexMatch match, SilcRegexFlags flags) { struct re_registers regs; - int ret, i, len = strlen(string); + unsigned int f = 0; + int ret, i; if (!regexp || !string) { silc_set_errno(SILC_ERR_INVALID_ARGUMENT); @@ -1621,13 +2352,17 @@ SilcBool silc_regex_match(SilcRegex regexp, const char *string, if (num_match > RE_NREGS) num_match = RE_NREGS; + /* Set flags */ + if (flags & SILC_REGEX_NOTBOL) + f |= RE_NOTBOL; + if (flags & SILC_REGEX_NOTEOL) + f |= RE_NOTEOL; + /* Search */ - ret = re_search(regexp, (char *)string, len, 0, len, - num_match ? ®s : NULL); + ret = silc_re_search(regexp, (unsigned char *)string, string_len, 0, + string_len, num_match ? ®s : NULL, f); if (ret < 0) { - if (ret == -2) - silc_set_errno(SILC_ERR); - else + if (ret == -1) silc_set_errno(SILC_ERR_NOT_FOUND); } @@ -1646,5 +2381,351 @@ SilcBool silc_regex_match(SilcRegex regexp, const char *string, void silc_regex_free(SilcRegex regexp) { - silc_free(regexp->buffer); + silc_sfree(regexp->rstack, regexp->buffer); + silc_stack_free(regexp->rstack); +} + +/* Match string */ + +SilcBool silc_regex_va(const char *string, SilcUInt32 string_len, + const char *regex, SilcBuffer match, va_list va) +{ + SilcRegexStruct reg; + SilcRegexMatch m = NULL; + SilcBuffer buf, *rets = NULL; + SilcStack stack; + int i, c = 0; + + /* Compile */ + if (!silc_regex_compile(®, regex, 0)) + return FALSE; + + stack = reg.rstack; + silc_stack_push(stack, NULL); + + /* Get match pointers */ + if (match) { + rets = silc_smalloc(stack, sizeof(*rets)); + if (!rets) { + silc_stack_pop(stack); + silc_regex_free(®); + return FALSE; + } + silc_buffer_set(match, NULL, 0); + rets[c++] = match; + + while ((buf = va_arg(va, SilcBuffer))) { + rets = silc_srealloc(stack, c * sizeof(*rets), + rets, (c + 1) * sizeof(*rets)); + if (!rets) { + silc_stack_pop(stack); + silc_regex_free(®); + return FALSE; + } + silc_buffer_set(buf, NULL, 0); + rets[c++] = buf; + } + + m = silc_smalloc(stack, c * sizeof(*m)); + if (!m) { + silc_sfree(stack, rets); + silc_stack_pop(stack); + silc_regex_free(®); + return FALSE; + } + } + + /* Match */ + if (!silc_regex_match(®, string, string_len, c, m, 0)) { + silc_sfree(stack, m); + silc_sfree(stack, rets); + silc_stack_pop(stack); + silc_regex_free(®); + return FALSE; + } + + /* Return matches */ + for (i = 0; i < c; i++) { + if (m[i].start == -1) + continue; + silc_buffer_set(rets[i], (unsigned char *)string + m[i].start, + m[i].end - m[i].start); + } + + silc_sfree(stack, m); + silc_sfree(stack, rets); + silc_stack_pop(stack); + silc_regex_free(®); + + return TRUE; +} + +/* Match string */ + +SilcBool silc_regex(const char *string, const char *regex, + SilcBuffer match, ...) +{ + SilcBool ret; + va_list va; + + if (!string || !regex) { + silc_set_errno(SILC_ERR_INVALID_ARGUMENT); + return FALSE; + } + + if (match) + va_start(va, match); + + ret = silc_regex_va(string, strlen(string), regex, match, va); + + if (match) + va_end(va); + + return ret; +} + +/* Match string */ + +SilcBool silc_regex_buffer(SilcBuffer buffer, const char *regex, + SilcBuffer match, ...) +{ + SilcBool ret; + va_list va; + + if (!buffer || !regex) { + silc_set_errno(SILC_ERR_INVALID_ARGUMENT); + return FALSE; + } + + if (match) + va_start(va, match); + + ret = silc_regex_va((const char *)silc_buffer_data(buffer), + silc_buffer_len(buffer), regex, match, va); + + if (match) + va_end(va); + + return ret; +} + +/***************************** Substitution API *****************************/ + +/* Regexp to parse sed substitution command syntax */ +#define SILC_REGEXP_SUBST \ + "^(/?.+/?[^!s]|[0-9]+|\\$)?(!?s)(/)(.*[^\\])?(/)(.*[^\\])?(/)(!?.+)?" + +/* Substitution context */ +typedef struct { + SilcInt32 addr_number; /* Line number to match, -1 for last line */ + SilcUInt32 line; /* Current line number */ + char *str_regexp; /* REGEXP to match */ + SilcBufferRegexFlags match_flags; /* Match flags */ + SilcBufferRegexFlags addr_flags; /* ADDR flags */ + SilcBuffer rep; /* REPLACEMENT */ +} SilcSubstContext; + +/* Function to check the ADDR match and do rest of the match and + substitution. */ + +static int silc_subst_addr(SilcStack stack, SilcBuffer buffer, void *value, + void *context) +{ + SilcSubstContext *ctx = context; + + ctx->line++; + + /* If NUMBER was set in ADDR, match for specific line number */ + if (ctx->addr_number > 0 && ctx->addr_number != ctx->line && + !(ctx->addr_flags & SILC_STR_REGEX_NOT)) + return 0; + if (ctx->addr_number > 0 && ctx->addr_number == ctx->line && + ctx->addr_flags & SILC_STR_REGEX_NOT) + return 0; + + /* Check for last line if ADDR was '$' */ + if (buffer->tail != buffer->end && ctx->addr_number == -1 && + !(ctx->addr_flags & SILC_STR_REGEX_NOT)) + return 0; + if (buffer->tail == buffer->end && ctx->addr_number == -1 && + ctx->addr_flags & SILC_STR_REGEX_NOT) + return 0; + + /* Match and replace */ + return silc_buffer_format(buffer, + SILC_STR_REGEX(ctx->str_regexp, ctx->match_flags), + SILC_STR_REPLACE(silc_buffer_data(ctx->rep) ? + silc_buffer_data(ctx->rep) : + (unsigned char *)"", + silc_buffer_len(ctx->rep)), + SILC_STR_END, SILC_STR_END); +} + +/* Matching and substitution ala sed. */ + +SilcBool silc_subst(SilcBuffer buffer, const char *subst) +{ + SilcSubstContext ctx; + SilcBufferStruct match, addr, command, exp_start, exp, exp_end; + SilcBufferStruct rep, rep_end, flags; + SilcBufferRegexFlags addr_flags = 0, match_flags = 0; + char *str_addr = ""; + int ret = -1; + + memset(&ctx, 0, sizeof(ctx)); + + if (!buffer || !subst) { + silc_set_errno(SILC_ERR_INVALID_ARGUMENT); + goto out; + } + + SILC_LOG_DEBUG(("Substitution '%s'", subst)); + + /* Parse the expression syntax */ + if (!silc_regex(subst, SILC_REGEXP_SUBST, &match, &addr, &command, + &exp_start, &exp, &exp_end, &rep, &rep_end, &flags, NULL)) { + silc_set_errno_reason(SILC_ERR_SYNTAX, "Invalid substitution expression"); + goto out; + } + + /* Check address syntax */ + if (silc_buffer_len(&addr)) { + if (*silc_buffer_data(&addr) == '/') { + silc_buffer_pull(&addr, 1); + if (addr.tail[-1] != '/') { + silc_set_errno_reason(SILC_ERR_SYNTAX, + "Invalid address syntax, missing '/'"); + goto out; + } + silc_buffer_push_tail(&addr, 1); + + if (!silc_buffer_len(&addr)) { + silc_set_errno_reason(SILC_ERR_SYNTAX, + "Invalid address syntax, missing regular " + "expression"); + goto out; + } + str_addr = silc_memdup(silc_buffer_data(&addr), + silc_buffer_len(&addr)); + + } else if (*silc_buffer_data(&addr) == '$' && + silc_buffer_len(&addr) == 1) { + ctx.addr_number = -1; + + } else if (isdigit((int)*silc_buffer_data(&addr))) { + ctx.addr_number = *silc_buffer_data(&addr) - '0'; + silc_buffer_pull(&addr, 1); + while (silc_buffer_len(&addr) && + isdigit((int)*silc_buffer_data(&addr))) { + ctx.addr_number *= 10; + ctx.addr_number += *silc_buffer_data(&addr) - '0'; + silc_buffer_pull(&addr, 1); + } + + if (silc_buffer_len(&addr)) { + silc_set_errno_reason(SILC_ERR_SYNTAX, + "Invalid address syntax, not a number"); + goto out; + } + + if (ctx.addr_number == 0) { + silc_set_errno_reason(SILC_ERR_SYNTAX, + "Invalid address syntax, line address is 0"); + goto out; + } + + } else { + silc_set_errno_reason(SILC_ERR_SYNTAX, "Unsupported address syntax"); + goto out; + } + } + + /* Check command syntax */ + if (!silc_buffer_len(&command) || silc_buffer_len(&command) > 2) { + silc_set_errno_reason(SILC_ERR_SYNTAX, "Invalid commmand"); + goto out; + } + if ((silc_buffer_len(&command) == 1 && + !silc_buffer_memcmp(&command, "s", 1)) || + (silc_buffer_len(&command) == 2 && + !silc_buffer_memcmp(&command, "!s", 2))) { + silc_set_errno_reason(SILC_ERR_SYNTAX, "Invalid command"); + goto out; + } + if (silc_buffer_len(&command) == 2) + addr_flags |= SILC_STR_REGEX_NOT; + + /* Check REGEXP syntax */ + if (!silc_buffer_len(&exp_start) || + !silc_buffer_memcmp(&exp_start, "/", 1)) { + silc_set_errno_reason(SILC_ERR_SYNTAX, + "Invalid substitution syntax, missing '/'"); + goto out; + } + if (!silc_buffer_len(&exp_end) || + !silc_buffer_memcmp(&exp_end, "/", 1)) { + silc_set_errno_reason(SILC_ERR_SYNTAX, + "Invalid substitution syntax, missing '/'"); + goto out; + } + + /* Check FLAGS syntax */ + if (silc_buffer_len(&flags)) { + if (silc_buffer_len(&flags) > 1) { + silc_set_errno_reason(SILC_ERR_SYNTAX, "Invalid flags"); + goto out; + } + + /* Check supported flags */ + if (silc_buffer_len(&flags) == 1) { + if (silc_buffer_memcmp(&flags, "g", 1)) { + match_flags |= SILC_STR_REGEX_ALL; + } else { + silc_set_errno_reason(SILC_ERR_SYNTAX, "Unsupported flag"); + goto out; + } + } + } + + /* Set flags */ + match_flags |= SILC_STR_REGEX_INCLUSIVE; + addr_flags |= SILC_STR_REGEX_NL | SILC_STR_REGEX_NO_ADVANCE; + + ctx.str_regexp = silc_memdup(silc_buffer_data(&exp), + silc_buffer_len(&exp)); + ctx.addr_flags = addr_flags; + ctx.match_flags = match_flags; + + /* Unescape escapes from REPLACEMENT */ + ctx.rep = silc_buffer_copy(&rep); + if (!ctx.rep) + goto out; + if (silc_buffer_len(ctx.rep)) + silc_buffer_format(ctx.rep, + SILC_STR_REGEX("\\\\/", (SILC_STR_REGEX_ALL | + SILC_STR_REGEX_INCLUSIVE)), + SILC_STR_REPLACE("/", 1), + SILC_STR_END, SILC_STR_END); + + /* If NUMBER or $ is specified, handle NOT flag in the silc_subst_addr */ + if (ctx.addr_number) + addr_flags &= ~SILC_STR_REGEX_NOT; + + SILC_LOG_DEBUG(("ADDR '%s' flags 0x%x, NUMBER %d", str_addr, addr_flags, + ctx.addr_number)); + SILC_LOG_DEBUG(("REGEXP '%s' flags 0x%x", ctx.str_regexp, match_flags)); + + /* Match and replace */ + ret = silc_buffer_format(buffer, + SILC_STR_REGEX(str_addr, addr_flags), + SILC_STR_FUNC(silc_subst_addr, NULL, &ctx), + SILC_STR_END, SILC_STR_END); + + out: + if (str_addr && strlen(str_addr)) + silc_free(str_addr); + silc_free(ctx.str_regexp); + silc_buffer_free(ctx.rep); + + return ret >= 0 ? TRUE : FALSE; }