Author: Pekka Riikonen <priikone@silcnet.org>
- Copyright (C) 2007 Pekka Riikonen
+ Copyright (C) 2007 - 2008 Pekka Riikonen
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
*/
-/****h* silcutil/SILC Regular Expression Interface
+/****h* silcutil/Regex Interface
*
* DESCRIPTION
*
* SILC regular expression interface provides Unix and POSIX compliant
- * regular expression compilation and matching. The syntax is compliant
- * with Unix and POSIX regular expression syntax.
+ * regular expression compilation and matching.
*
* The interface also provides many convenience functions to make the use
- * of regular expressions easier.
+ * of regular expressions easier. Especially the silc_regex allows very
+ * simple way to match strings against regular expressions and get the
+ * exact match or matches as a return. The silc_subst provides simple and
+ * familiar way to match and substitute strings (Sed syntax).
+ *
+ * The regex syntax follows POSIX regex syntax:
+ *
+ * Expressions:
+ * ^ Match start of line/string
+ * '^a' matches 'ab' but not 'ba'
+ * $ Match end of line/string
+ * 'a$' matches 'ba' but not 'ab'
+ * . Match any single character (except new line (\n))
+ * '.a' matches 'ba' but not 'a'
+ * + Preceding item is matched one or more times
+ * 'a+b' matches 'aaab' but not 'b'
+ * * Preceding item is matched zero or more times
+ * 'a*b' matches 'ab', 'aab' and 'b'
+ * ? Preceding item is matched zero or one time
+ * 'ca?b' matches 'cb' and 'cab' but not 'caab'
+ * | Joins two expressions and matches either of them (OR)
+ * 'foo|bar' matches 'foo' or 'bar'
+ * {n} Preceding item is matched exactly n times (n can be 0-255)
+ * 'a{2}' matches 'aa' but not 'aaa'
+ * {n,} Preceding item is matched n or more times
+ * 'a{2,} matches 'aa' and 'aaaa' but not 'a'
+ * {n,m} Preceding item is matched at least n times and at most m times
+ * 'a{2,4}' matches 'aa', 'aaa' and 'aaaa' but not 'aaaaa'
+ * [ ] Match any single character in the character list inside [ ]
+ * '[0123]' matches only '0', '1', '2' or '3'
+ * [ - ] Match any single character in the specified range
+ * '[0-5]' matches digits 0-5.
+ * [^ ] Match any character not in the character list or range
+ * '[^09]]' matches any other character except '0' and '9'
+ * ( ) Subexpression, grouping
+ *
+ * Escaping (C-language style, '\' is written as '\\'):
+ * \\ Considers following character literal ('\\{' is '{')
+ * \\\\ Matches literal \
+ * \a Matches bell (BEL)
+ * \t Matches horizontal tab (HT)
+ * \n Matches new line (LF)
+ * \v Matches vertical tab (VT)
+ * \f Matches form feed (FF)
+ * \r Matches carriage ret (CR)
+ * \\< Match null string at the start of a word
+ * \\> Match null string at the end of a word
+ * \\b Match null string at the edge of a wrod
+ * \\B Match null string when not at the edge of a word
*
* EXAMPLE
*
* // Free the compiled regular expression
* silc_regex_free(®);
*
+ * // Simple match
+ * if (!silc_regex("foobar", "foo.", NULL))
+ * no_match;
+ *
+ * // Replace all foos with bar on all lines in the buffer
+ * silc_subst(buffer, "s/foo/bar/g");
+ *
***/
#ifndef SILCREGEX_H
#define SILCREGEX_H
-/****s* silcutil/SilcRegexAPI/SilcRegex
+/****s* silcutil/SilcRegex
*
* NAME
*
char anchor; /* anchor: 0=none 1=begline 2=begbuf */
} *SilcRegex, SilcRegexStruct;
-/****s* silcutil/SilcRegexAPI/SilcRegexMatch
+/****s* silcutil/SilcRegexMatch
*
* NAME
*
} *SilcRegexMatch, SilcRegexMatchStruct;
/***/
-/****d* silcutil/SilcRegexAPI/SilcRegexFlags
+/****d* silcutil/SilcRegexFlags
*
* NAME
*
} SilcRegexFlags;
/***/
-/****f* silcutil/SilcRegexAPI/silc_regex_compile
+/****f* silcutil/silc_regex_compile
*
* SYNOPSIS
*
SilcBool silc_regex_compile(SilcRegex regexp, const char *regex,
SilcRegexFlags flags);
-/****f* silcutil/SilcRegexAPI/silc_regex_compile
+/****f* silcutil/silc_regex_match
*
* SYNOPSIS
*
SilcUInt32 string_len, SilcUInt32 num_match,
SilcRegexMatch match, SilcRegexFlags flags);
-/****f* silcutil/SilcRegexAPI/silc_regex_free
+/****f* silcutil/silc_regex_free
*
* SYNOPSIS
*
***/
void silc_regex_free(SilcRegex regexp);
-/****f* silcutil/SilcRegexAPI/silc_regex
+/****f* silcutil/silc_regex
*
* SYNOPSIS
*
* The first (whole) match is returned to `match' buffer if it is non-NULL.
* The variable argument list are buffers where multiple matches are
* returned in case of group (parenthesized) regular expression. The caller
- * needs to know how many pointers to provide, in order to get all matches.
+ * needs to know how many pointers to provide in order to get all matches.
+ * If a particular group is optional, a buffer pointer still must be given
+ * as argument for it, however, if it did not match the returned buffer
+ * length is 0 and data pointer is NULL.
+ *
* If `match' is non-NULL the variable argument list must be ended with
* NULL. The data in the `match' and in any other buffer is from `string'
* and must not be freed by the caller.
SilcBool silc_regex(const char *string, const char *regex,
SilcBuffer match, ...);
-/****f* silcutil/SilcRegexAPI/silc_regex_buffer
+/****f* silcutil/silc_regex_buffer
*
* SYNOPSIS
*
SilcBool silc_regex_buffer(SilcBuffer buffer, const char *regex,
SilcBuffer match, ...);
+/****f* silcutil/silc_subst
+ *
+ * SYNOPSIS
+ *
+ * SilcBool silc_subst(SilcBuffer buffer, const char *subst);
+ *
+ * DESCRIPTION
+ *
+ * Regular expression matching and substitution in `buffer' according
+ * to the substitution expression `subst'. This function provides
+ * Sed (Stream Editor) style substitution interface. The `subst' may
+ * be of following formats:
+ *
+ * 's/REGEXP/REPLACEMENT/FLAGS'
+ *
+ * Matches regular expression REGEXP in each line in the buffer and
+ * substitutes the match with REPLACEMENT.
+ *
+ * 'ADDRs/REGEXP/REPLACEMENT/FLAGS'
+ *
+ * Selects lines in the buffer matching the address ADDR and matches the
+ * regular expression REGEXP in the line and substitutes the match with
+ * REPLACEMENT.
+ *
+ * The ADDR may be of following format:
+ *
+ * /REGEXP/ Matches only lines matching the regular expression
+ * NUMBER Matches only the specified line number (1-n)
+ * $ Matches only the last line
+ *
+ * The FLAGS may be of following format:
+ *
+ * no FLAGS Finds first match in the line and replaces that
+ * g Finds and replaces all matches in the line
+ *
+ * An '!' may precede the 's'. In that case the ADDR is not matched.
+ *
+ * Returns TRUE if the match and replacement was done, FALSE in case
+ * of error, and sets the silc_errno.
+ *
+ * If you need to match and/or replace '/' characters, they must be
+ * escaped with '\' (C-style escaping for '\' is '\\').
+ *
+ * If you need more versatile ways to modify the buffer you may consider
+ * using the SILC_STR_REGEX in SILC Buffer Format API directly. This
+ * function only provides basic matching and substitution.
+ *
+ * EXAMPLE
+ *
+ * // Replace all foos with bar on all lines in the buffer
+ * silc_subst(buffer, "s/foo/bar/g");
+ *
+ ***/
+SilcBool silc_subst(SilcBuffer buffer, const char *subst);
+
/* Backwards support */
#define silc_string_regex_match(regex, string) silc_regex(string, regex, NULL)