petidomo/libtext/transform_text.c

/*
 * Copyright (c) 1995-2013 Peter Simons <simons@cryp.to>
 * Copyright (c) 2000-2001 Cable & Wireless GmbH
 * Copyright (c) 1999-2000 CyberSolutions GmbH
 *
 * This program is free software: you can redistribute it and/or modify it under
 * the terms of the GNU General Public License as published by the Free Software
 * Foundation, either version 3 of the License, or (at your option) any later
 * version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 * details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include <sys/types.h>
#include <regex.h>
#include "text.h"

#ifndef MAX_TRANSFORM_ELEMENTS
#  define MAX_TRANSFORM_ELEMENTS 10
#endif

/* Do text-transformations using regular expressions.

   TransformText() is an easy interface to the regular expression
   routines included in most Unix kernels. It allows you to use text
   manipulation and replacing operations with a grace similar to
   sed(1) and perl(1).

   The regular expression language is described in the re_format(2)
   man file in great detail.

   RETURNS: TransformText() will return one of the following codes,
   indicating the success or failure of the transformation:

   TEXT_REGEX_OK: Success.

   TEXT_REGEX_ERROR: This error occurs if TransformText() failed to
   compile the given regular expression or if the regular expression
   didn't specify any submatches -- what is syntactically correct, but
   useless for this routine.

   TEXT_REGEX_TRANSFORM_DIDNT_MATCH: This returncode indicates that
   the provided regular expression did not match the text buffer.

   EXAMPLE:

   The following call will remove all whitespace at the begining and
   the end of the string contained in 'buf' and place the result back
   in the same variable:

       TransformText(buf, buf, "^[\t ]*(.*)[\t ]*$", "\\\\1");

   This practice is safe in this case, because the result string is
   guaranteed to be of equal length of shorter than the original. If
   this is not the case you must use a seperate target buffer or you
   will mess your string and buffers up badly.

   AUTHOR: Peter Simons <simons@rhein.de>

 */

int
text_transform_text(char *          dst_buffer,   /* Where to save the resulting string. */
		    const char *    src_buffer,   /* Text to transform. */
		    const char *    regex,        /* Regex to describe what matches. */
		    const char *    rule)         /* How the result should look. */
{
    regex_t       preg;
    regmatch_t    pmatch[MAX_TRANSFORM_ELEMENTS];
    char          error_msg[256];
    int           rc;
    unsigned int  i, j;
    const char *  src_p;
    char *        dst_p;

    /* Compile the regular expression. */

    rc = regcomp(&preg, regex, REG_EXTENDED | REG_ICASE);
    if (rc != 0) {
	regfree(&preg);
	return TEXT_REGEX_ERROR;
    }
    if (preg.re_nsub <= 0) {
	regfree(&preg);
	return TEXT_REGEX_ERROR;
    }

    /* Build the matching array. */

    rc = regexec(&preg, src_buffer, MAX_TRANSFORM_ELEMENTS, pmatch, 0);
    if (rc != 0) {
	if (rc == REG_NOMATCH) {
	    regfree(&preg);
	    return TEXT_REGEX_TRANSFORM_DIDNT_MATCH;
	}
	else {
	    regerror(rc, &preg, error_msg, (size_t) sizeof(error_msg));
	    regfree(&preg);
	    return TEXT_REGEX_ERROR;
	}
    }

    /* Do the transformation. */

    src_p = rule;
    dst_p = dst_buffer;
    do {
	switch (*src_p) {
	  case '\\':		/* Handle backslash squences. */
	      src_p++;
	      switch (*src_p) {
		case '0': case '1': case '2':
		case '3': case '4': case '5':
		case '6': case '7': case '8':
		case '9':	/* Substitute appropriate match. */
		    i = *src_p - '0';
		    for (j = pmatch[i].rm_so; j < pmatch[i].rm_eo; j++)
		      *dst_p++ = src_buffer[j];
		    src_p++;
		    break;
		case '\\':	/* Copy bashslash verbatim. */
		    *dst_p++ = *src_p++;
		    break;
		default:	/* Copy verbatim and warn about unknown sequence. */
		    *dst_p++ = *src_p++;
	      }
	      break;
	  default:
	      *dst_p++ = *src_p++;
	}
    } while (*src_p != '\0');
    *dst_p = '\0';		/* Terminate string. */

    regfree(&preg);

    return TEXT_REGEX_OK;
}