2000-12-13 13:19:03 +00:00
|
|
|
/*
|
2010-02-24 16:01:14 +00:00
|
|
|
* Copyright (c) 1995-2010 Peter Simons <simons@cryp.to>
|
|
|
|
|
* Copyright (c) 2000-2001 Cable & Wireless GmbH
|
|
|
|
|
* Copyright (c) 1999-2000 CyberSolutions GmbH
|
|
|
|
|
*
|
|
|
|
|
* This program is free software: you can redistribute it and/or modify it under
|
|
|
|
|
* the terms of the GNU General Public License as published by the Free Software
|
|
|
|
|
* Foundation, either version 3 of the License, or (at your option) any later
|
|
|
|
|
* version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
|
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
|
|
|
|
* details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License along with
|
|
|
|
|
* this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
*/
|
2000-12-13 13:19:03 +00:00
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
|
#include <regex.h>
|
|
|
|
|
#include "text.h"
|
|
|
|
|
|
|
|
|
|
#ifndef MAX_TRANSFORM_ELEMENTS
|
|
|
|
|
# define MAX_TRANSFORM_ELEMENTS 10
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/* Do text-transformations using regular expressions.
|
|
|
|
|
|
|
|
|
|
TransformText() is an easy interface to the regular expression
|
|
|
|
|
routines included in most Unix kernels. It allows you to use text
|
|
|
|
|
manipulation and replacing operations with a grace similar to
|
|
|
|
|
sed(1) and perl(1).
|
|
|
|
|
|
|
|
|
|
The regular expression language is described in the re_format(2)
|
|
|
|
|
man file in great detail.
|
|
|
|
|
|
|
|
|
|
RETURNS: TransformText() will return one of the following codes,
|
|
|
|
|
indicating the success or failure of the transformation:
|
|
|
|
|
|
|
|
|
|
TEXT_REGEX_OK: Success.
|
|
|
|
|
|
|
|
|
|
TEXT_REGEX_ERROR: This error occurs if TransformText() failed to
|
|
|
|
|
compile the given regular expression or if the regular expression
|
|
|
|
|
didn't specify any submatches -- what is syntactically correct, but
|
|
|
|
|
useless for this routine.
|
|
|
|
|
|
|
|
|
|
TEXT_REGEX_TRANSFORM_DIDNT_MATCH: This returncode indicates that
|
|
|
|
|
the provided regular expression did not match the text buffer.
|
|
|
|
|
|
|
|
|
|
EXAMPLE:
|
|
|
|
|
|
|
|
|
|
The following call will remove all whitespace at the begining and
|
|
|
|
|
the end of the string contained in 'buf' and place the result back
|
|
|
|
|
in the same variable:
|
|
|
|
|
|
|
|
|
|
TransformText(buf, buf, "^[\t ]*(.*)[\t ]*$", "\\\\1");
|
|
|
|
|
|
|
|
|
|
This practice is safe in this case, because the result string is
|
|
|
|
|
guaranteed to be of equal length of shorter than the original. If
|
|
|
|
|
this is not the case you must use a seperate target buffer or you
|
|
|
|
|
will mess your string and buffers up badly.
|
|
|
|
|
|
|
|
|
|
AUTHOR: Peter Simons <simons@rhein.de>
|
|
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
|
text_transform_text(char * dst_buffer, /* Where to save the resulting string. */
|
|
|
|
|
const char * src_buffer, /* Text to transform. */
|
|
|
|
|
const char * regex, /* Regex to describe what matches. */
|
|
|
|
|
const char * rule) /* How the result should look. */
|
|
|
|
|
{
|
|
|
|
|
regex_t preg;
|
|
|
|
|
regmatch_t pmatch[MAX_TRANSFORM_ELEMENTS];
|
|
|
|
|
char error_msg[256];
|
|
|
|
|
int rc;
|
|
|
|
|
unsigned int i, j;
|
|
|
|
|
const char * src_p;
|
|
|
|
|
char * dst_p;
|
|
|
|
|
|
|
|
|
|
/* Compile the regular expression. */
|
|
|
|
|
|
|
|
|
|
rc = regcomp(&preg, regex, REG_EXTENDED | REG_ICASE);
|
|
|
|
|
if (rc != 0) {
|
|
|
|
|
regfree(&preg);
|
|
|
|
|
return TEXT_REGEX_ERROR;
|
|
|
|
|
}
|
|
|
|
|
if (preg.re_nsub <= 0) {
|
|
|
|
|
regfree(&preg);
|
|
|
|
|
return TEXT_REGEX_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Build the matching array. */
|
|
|
|
|
|
|
|
|
|
rc = regexec(&preg, src_buffer, MAX_TRANSFORM_ELEMENTS, pmatch, 0);
|
|
|
|
|
if (rc != 0) {
|
|
|
|
|
if (rc == REG_NOMATCH) {
|
|
|
|
|
regfree(&preg);
|
|
|
|
|
return TEXT_REGEX_TRANSFORM_DIDNT_MATCH;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
regerror(rc, &preg, error_msg, (size_t) sizeof(error_msg));
|
|
|
|
|
regfree(&preg);
|
|
|
|
|
return TEXT_REGEX_ERROR;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Do the transformation. */
|
|
|
|
|
|
|
|
|
|
src_p = rule;
|
|
|
|
|
dst_p = dst_buffer;
|
|
|
|
|
do {
|
|
|
|
|
switch (*src_p) {
|
|
|
|
|
case '\\': /* Handle backslash squences. */
|
|
|
|
|
src_p++;
|
|
|
|
|
switch (*src_p) {
|
|
|
|
|
case '0': case '1': case '2':
|
|
|
|
|
case '3': case '4': case '5':
|
|
|
|
|
case '6': case '7': case '8':
|
|
|
|
|
case '9': /* Substitute appropriate match. */
|
|
|
|
|
i = *src_p - '0';
|
|
|
|
|
for (j = pmatch[i].rm_so; j < pmatch[i].rm_eo; j++)
|
|
|
|
|
*dst_p++ = src_buffer[j];
|
|
|
|
|
src_p++;
|
|
|
|
|
break;
|
|
|
|
|
case '\\': /* Copy bashslash verbatim. */
|
|
|
|
|
*dst_p++ = *src_p++;
|
|
|
|
|
break;
|
|
|
|
|
default: /* Copy verbatim and warn about unknown sequence. */
|
|
|
|
|
*dst_p++ = *src_p++;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
*dst_p++ = *src_p++;
|
|
|
|
|
}
|
|
|
|
|
} while (*src_p != '\0');
|
|
|
|
|
*dst_p = '\0'; /* Terminate string. */
|
|
|
|
|
|
|
|
|
|
regfree(&preg);
|
|
|
|
|
|
|
|
|
|
return TEXT_REGEX_OK;
|
|
|
|
|
}
|