petidomo/libtext/transform_text.c

/*
   $Source$
   $Revision$

   Copyright (C) 2000 by CyberSolutions GmbH, Germany.

   This file is part of OpenPetidomo.

   OpenPetidomo is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   OpenPetidomo is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with OpenPetidomo; see the file COPYING. If not, write to
   the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.
*/

#include <sys/types.h>
#include <regex.h>
#include "text.h"

#ifndef MAX_TRANSFORM_ELEMENTS
#  define MAX_TRANSFORM_ELEMENTS 10
#endif

/* Do text-transformations using regular expressions.

   TransformText() is an easy interface to the regular expression
   routines included in most Unix kernels. It allows you to use text
   manipulation and replacing operations with a grace similar to
   sed(1) and perl(1).

   The regular expression language is described in the re_format(2)
   man file in great detail.

   RETURNS: TransformText() will return one of the following codes,
   indicating the success or failure of the transformation:

   TEXT_REGEX_OK: Success.

   TEXT_REGEX_ERROR: This error occurs if TransformText() failed to
   compile the given regular expression or if the regular expression
   didn't specify any submatches -- what is syntactically correct, but
   useless for this routine.

   TEXT_REGEX_TRANSFORM_DIDNT_MATCH: This returncode indicates that
   the provided regular expression did not match the text buffer.

   EXAMPLE:

   The following call will remove all whitespace at the begining and
   the end of the string contained in 'buf' and place the result back
   in the same variable:

       TransformText(buf, buf, "^[\t ]*(.*)[\t ]*$", "\\\\1");

   This practice is safe in this case, because the result string is
   guaranteed to be of equal length of shorter than the original. If
   this is not the case you must use a seperate target buffer or you
   will mess your string and buffers up badly.

   AUTHOR: Peter Simons <simons@rhein.de>

 */

int
text_transform_text(char *          dst_buffer,   /* Where to save the resulting string. */
		    const char *    src_buffer,   /* Text to transform. */
		    const char *    regex,        /* Regex to describe what matches. */
		    const char *    rule)         /* How the result should look. */
{
    regex_t       preg;
    regmatch_t    pmatch[MAX_TRANSFORM_ELEMENTS];
    char          error_msg[256];
    int           rc;
    unsigned int  i, j;
    const char *  src_p;
    char *        dst_p;

    /* Compile the regular expression. */

    rc = regcomp(&preg, regex, REG_EXTENDED | REG_ICASE);
    if (rc != 0) {
	regfree(&preg);
	return TEXT_REGEX_ERROR;
    }
    if (preg.re_nsub <= 0) {
	regfree(&preg);
	return TEXT_REGEX_ERROR;
    }

    /* Build the matching array. */

    rc = regexec(&preg, src_buffer, MAX_TRANSFORM_ELEMENTS, pmatch, 0);
    if (rc != 0) {
	if (rc == REG_NOMATCH) {
	    regfree(&preg);
	    return TEXT_REGEX_TRANSFORM_DIDNT_MATCH;
	}
	else {
	    regerror(rc, &preg, error_msg, (size_t) sizeof(error_msg));
	    regfree(&preg);
	    return TEXT_REGEX_ERROR;
	}
    }

    /* Do the transformation. */

    src_p = rule;
    dst_p = dst_buffer;
    do {
	switch (*src_p) {
	  case '\\':		/* Handle backslash squences. */
	      src_p++;
	      switch (*src_p) {
		case '0': case '1': case '2':
		case '3': case '4': case '5':
		case '6': case '7': case '8':
		case '9':	/* Substitute appropriate match. */
		    i = *src_p - '0';
		    for (j = pmatch[i].rm_so; j < pmatch[i].rm_eo; j++)
		      *dst_p++ = src_buffer[j];
		    src_p++;
		    break;
		case '\\':	/* Copy bashslash verbatim. */
		    *dst_p++ = *src_p++;
		    break;
		default:	/* Copy verbatim and warn about unknown sequence. */
		    *dst_p++ = *src_p++;
	      }
	      break;
	  default:
	      *dst_p++ = *src_p++;
	}
    } while (*src_p != '\0');
    *dst_p = '\0';		/* Terminate string. */

    regfree(&preg);

    return TEXT_REGEX_OK;
}
Initial revision 2000-12-13 13:19:03 +00:00			`/*`
Added correct GPL copyright headers. 2000-12-13 17:37:56 +00:00			$Source$
			$Revision$

			`Copyright (C) 2000 by CyberSolutions GmbH, Germany.`

			`This file is part of OpenPetidomo.`

			`OpenPetidomo is free software; you can redistribute it and/or modify`
			`it under the terms of the GNU General Public License as published by`
			`the Free Software Foundation; either version 2, or (at your option)`
			`any later version.`

			`OpenPetidomo is distributed in the hope that it will be useful, but`
			`WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`General Public License for more details.`

			`You should have received a copy of the GNU General Public License`
			`along with OpenPetidomo; see the file COPYING. If not, write to`
			`the Free Software Foundation, Inc., 59 Temple Place - Suite 330,`
			`Boston, MA 02111-1307, USA.`
			`*/`
Initial revision 2000-12-13 13:19:03 +00:00
			`#include <sys/types.h>`
			`#include <regex.h>`
			`#include "text.h"`

			`#ifndef MAX_TRANSFORM_ELEMENTS`
			`# define MAX_TRANSFORM_ELEMENTS 10`
			`#endif`

			`/* Do text-transformations using regular expressions.`

			`TransformText() is an easy interface to the regular expression`
			`routines included in most Unix kernels. It allows you to use text`
			`manipulation and replacing operations with a grace similar to`
			`sed(1) and perl(1).`

			`The regular expression language is described in the re_format(2)`
			`man file in great detail.`

			`RETURNS: TransformText() will return one of the following codes,`
			`indicating the success or failure of the transformation:`

			`TEXT_REGEX_OK: Success.`

			`TEXT_REGEX_ERROR: This error occurs if TransformText() failed to`
			`compile the given regular expression or if the regular expression`
			`didn't specify any submatches -- what is syntactically correct, but`
			`useless for this routine.`

			`TEXT_REGEX_TRANSFORM_DIDNT_MATCH: This returncode indicates that`
			`the provided regular expression did not match the text buffer.`

			`EXAMPLE:`

			`The following call will remove all whitespace at the begining and`
			`the end of the string contained in 'buf' and place the result back`
			`in the same variable:`

			`TransformText(buf, buf, "^[\t ](.)[\t ]*$", "\\\\1");`

			`This practice is safe in this case, because the result string is`
			`guaranteed to be of equal length of shorter than the original. If`
			`this is not the case you must use a seperate target buffer or you`
			`will mess your string and buffers up badly.`

			`AUTHOR: Peter Simons <simons@rhein.de>`

			`*/`

			`int`
			`text_transform_text(char * dst_buffer, /* Where to save the resulting string. */`
			`const char * src_buffer, /* Text to transform. */`
			`const char * regex, /* Regex to describe what matches. */`
			`const char * rule) /* How the result should look. */`
			`{`
			`regex_t preg;`
			`regmatch_t pmatch[MAX_TRANSFORM_ELEMENTS];`
			`char error_msg[256];`
			`int rc;`
			`unsigned int i, j;`
			`const char * src_p;`
			`char * dst_p;`

			`/* Compile the regular expression. */`

			`rc = regcomp(&preg, regex, REG_EXTENDED \| REG_ICASE);`
			`if (rc != 0) {`
			`regfree(&preg);`
			`return TEXT_REGEX_ERROR;`
			`}`
			`if (preg.re_nsub <= 0) {`
			`regfree(&preg);`
			`return TEXT_REGEX_ERROR;`
			`}`

			`/* Build the matching array. */`

			`rc = regexec(&preg, src_buffer, MAX_TRANSFORM_ELEMENTS, pmatch, 0);`
			`if (rc != 0) {`
			`if (rc == REG_NOMATCH) {`
			`regfree(&preg);`
			`return TEXT_REGEX_TRANSFORM_DIDNT_MATCH;`
			`}`
			`else {`
			`regerror(rc, &preg, error_msg, (size_t) sizeof(error_msg));`
			`regfree(&preg);`
			`return TEXT_REGEX_ERROR;`
			`}`
			`}`

			`/* Do the transformation. */`

			`src_p = rule;`
			`dst_p = dst_buffer;`
			`do {`
			`switch (*src_p) {`
			`case '\\': /* Handle backslash squences. */`
			`src_p++;`
			`switch (*src_p) {`
			`case '0': case '1': case '2':`
			`case '3': case '4': case '5':`
			`case '6': case '7': case '8':`
			`case '9': /* Substitute appropriate match. */`
			`i = *src_p - '0';`
			`for (j = pmatch[i].rm_so; j < pmatch[i].rm_eo; j++)`
			`*dst_p++ = src_buffer[j];`
			`src_p++;`
			`break;`
			`case '\\': /* Copy bashslash verbatim. */`
			`dst_p++ = src_p++;`
			`break;`
			`default: /* Copy verbatim and warn about unknown sequence. */`
			`dst_p++ = src_p++;`
			`}`
			`break;`
			`default:`
			`dst_p++ = src_p++;`
			`}`
			`} while (*src_p != '\0');`
			`dst_p = '\0'; / Terminate string. */`

			`regfree(&preg);`

			`return TEXT_REGEX_OK;`
			`}`