Imported latest version.

This commit is contained in:
Peter Simons 2000-12-13 15:45:25 +00:00
parent 7d19d78886
commit 11bb6bc1ca
23 changed files with 2172 additions and 204 deletions

View File

@ -1,69 +0,0 @@
#
# liblists Makefile
#
# $Header$
#
# Make Rules:
# ===========
#
.c.o:
$(CC) $(CFLAGS) $(CPPFLAGS) -c $<
# Compiler Flags:
# ===============
#
CFLAGS = -Wall
CPPFLAGS=
#
# Labels:
# =======
#
SRCS = lists.c
OBJS = $(SRCS:.c=.o)
MANFILES= $(SRCS:.c=.3)
#
# Targets
#
.PHONY: all man clean realclean distclean depend
all: liblists.a
man: InitList.3
clean:
rm -f liblists.a *.o *.3 *.core
realclean: clean
rm -rf man3
distclean: realclean
depend:
makedepend -Y /usr/include $(SRCS)
@rm -f Makefile.bak
InitList.3: lists.c
c2man -ilists.h -g lists.c
#
# Actions
#
liblists.a: $(OBJS)
rm -f $@
$(AR) cr $@ $(OBJS)
$(RANLIB) $@
#
# Dependencies
#
# DO NOT DELETE
lists.o: lists.h

15
liblists/SMakefile Normal file
View File

@ -0,0 +1,15 @@
#
# Skeleton Makefile -- process with smake to create real Makefile.
#
# $Header$
#
.opt -o GNUmakefile.in
LIBRARY = lists
SRCS = lists.c
.include <library.smk>
.include <autoconf.base.smk>
distclean-local::
rm -f lists.h

33
liblists/configure.in Normal file
View File

@ -0,0 +1,33 @@
dnl
dnl configure.in -- Process this file with autoconf to produce a configure script.
dnl
dnl $Header$
dnl
AC_INIT(lists.h.in)
dnl Checks for paths and programs.
dnl
AC_PROG_CC
AC_PROG_RANLIB
dnl Check for optional header files.
dnl
AC_CHECK_HEADERS(strings.h)
dnl Enable warning flags for gcc.
dnl
if test "$GCC" = yes; then
CFLAGS="$CFLAGS -Wall -pedantic"
CXXFLAGS="$CXXFLAGS -Wall -pedantic"
fi
dnl Remove '-g' and '-O2' from the compile flags.
dnl
CFLAGS=`echo $CFLAGS | sed -e "s/-g//" -e "s/-O2//"`
CXXFLAGS=`echo $CXXFLAGS | sed -e "s/-g//" -e "s/-O2//"`
dnl Write results.
dnl
AC_CONFIG_HEADER(lists.h)
AC_OUTPUT(GNUmakefile)

View File

@ -1,10 +1,39 @@
/*
* $Source$
* $Revision$
* $Date$
* $Source$
* $Revision$
* $Date$
*
* Copyright (C) 1996,97 by CyberSolutions GmbH.
* All rights reserved.
* Copyright (c) 1996-99 by Peter Simons <simons@cys.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Peter Simons.
*
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "lists.h"
@ -114,7 +143,7 @@ isEndOfList(const Node node)
assert(node != NULL);
if (node == NULL)
return NULL;
return 1;
return (node->ln_Next == NULL);
}
@ -418,7 +447,7 @@ CountElements(List head)
assert(head != NULL);
if (head == NULL)
return NULL;
return 0;
for (i = 0, node = getFirstNode(head); node != NULL; node = getNextNode(node))
i++;

View File

@ -1,15 +1,47 @@
/*
* $Source$
* $Revision$
* $Date$
* $Source$
* $Revision$
* $Date$
*
* Copyright (C) 1996,97 by CyberSolutions GmbH.
* All rights reserved.
* Copyright (c) 1996-99 by Peter Simons <simons@cys.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Peter Simons.
*
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __LIB_LISTS_H__
#define __LIB_LISTS_H__ 1
/* For autoconf... */
#undef HAVE_STRINGS_H
#include <stdlib.h>
#include <string.h>
#ifdef HAVE_STRINGS_H /* sunos 4 needs this */
@ -22,6 +54,7 @@
/********** Useful defines and declarations **********/
#ifndef __cplusplus
#ifndef __HAVE_DEFINED_BOOL__
# define __HAVE_DEFINED_BOOL__ 1
typedef int bool;
@ -32,6 +65,7 @@ typedef int bool;
#ifndef TRUE
# define TRUE (1==1)
#endif
#endif
/********** Structures **********/

View File

@ -0,0 +1,59 @@
#include "text.hpp"
#include <fstream.h>
/**
Example using the Custom Mode of the TokenIterator
*/
class MyCustomTokenIterator: public TokenIterator{
public:
MyCustomTokenIterator(string inputStr, bool b=false)
: TokenIterator(inputStr,TokenIterator::Custom, b){
eoltoken= '\n';
separator= ":\n";
whitespace= "";
};
MyCustomTokenIterator(istream &inputStr, bool b=false)
: TokenIterator(inputStr,TokenIterator::Custom, b){
eoltoken= '\n';
separator= ":\n";
whitespace= "";
};
};
int main(int argc, char* argv[]){
ifstream infile("/etc/passwd");
MyCustomTokenIterator tokenize(infile);
while(!infile.eof()){
string user= tokenize();
string password= tokenize();
string userid = tokenize();
string groupid= tokenize();
string description= tokenize();
string home = tokenize();
string shell= tokenize();
if(password=="x")
password="shadowed";
cout << "----"<<endl;
cout << "user :" <<user <<endl;
cout << "password :" <<password <<endl;
cout << "userid :" <<userid <<endl;
cout << "groupid :" <<groupid <<endl;
cout << "description:" <<description <<endl;
cout << "home :" <<home <<endl;
cout << "shell :" <<shell <<endl;
while( !infile.eof() && tokenize.thesep!= tokenize.eolToken() )
{
string trailing_garbage = tokenize();
cout<<"\\:"<< trailing_garbage;
}
cout<<endl;
}
return 0;
}

View File

@ -1,87 +0,0 @@
#
# Text routine library
#
# $Header$
#
# Make Rules:
# ===========
#
.SUFFIXES: .3
.c.o:
$(CC) $(CFLAGS) $(CPPFLAGS) -c $<
# Compiler flags:
# ===============
#
CFLAGS = -Wall
CPPFLAGS=
# Linker flags:
# =============
#
LDFLAGS =
LIBS =
#
# Labels:
# =======
#
SRCS = easy_pattern_match.c find_next_line.c find_string.c easy_sprintf.c \
transform_text.c wordwrap.c
OBJS = $(SRCS:.c=.o)
MANFILES= text_easy_pattern_match.3 text_find_string.3 text_transform_text.3 \
text_find_next_line.3 text_easy_sprintf.3 text_wordwrap.3
#
# Targets
#
.PHONY: all man clean depend
all: libtext.a
man: $(MANFILES)
clean:
rm -f libtext.a test $(OBJS)
rm -f *.o *.3 *.core *.bak
depend:
makedepend -Y /usr/include $(SRCS)
@rm -f Makefile.bak
#
# Actions:
#=========
#
libtext.a: $(OBJS)
$(AR) cr $@ $(OBJS)
$(RANLIB) $@
text_easy_pattern_match.3: easy_pattern_match.c text.h
c2man -itext.h easy_pattern_match.c
text_find_string.3: find_string.c text.h
c2man -itext.h find_string.c
text_transform_text.3: transform_text.c text.h
c2man -itext.h transform_text.c
text_find_next_line.3: find_next_line.c text.h
c2man -itext.h find_next_line.c
text_easy_sprintf.3: easy_sprintf.c text.h
c2man -itext.h easy_sprintf.c
text_wordwrap.3: wordwrap.c text.h
c2man -itext.h wordwrap.c
#
# Dependencies
#

259
libtext/RegexTokenizer.cpp Normal file
View File

@ -0,0 +1,259 @@
/*
* $Source$
* $Revision$
* $Date$
*
* Copyright (c) 1999 by CyberSolutions GmbH, Germany.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by CyberSolutions GmbH.
*
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "text.hpp"
char RegexTokenizer::workspace[RegexTokenizer::N_substring+1]="";
RegexTokenizer::RegexTokenizer(){
}
void RegexTokenizer::reset(){
input= string();
int i=N_pm; while(--i>0){ pm[i].rm_so=-1; pm[i].rm_eo=-1; }
so= 0;
eo= 0;
previous_eo= -1;
error= 0;
}
int RegexTokenizer::set(string _input,list<const char*> _regex){
reset();
input= _input;
list<const char*>::iterator first= _regex.begin();
list<const char*>::iterator last = _regex.end();
while(first!=last){
regex_t re;
int i;
//REG_EXTENDED
//use extended regular expressions
//REG_NEWLINE
//makes ^...$ work to match newline/endofline
i= regcomp (&re, *first, REG_EXTENDED|REG_NEWLINE);
if(i)
return i;
regex.push_back(re);
regex_src.push_back(*first);
++first;
}
}
RegexTokenizer::RegexTokenizer(string _input,Mode _mode){
mode= _mode;
//create a list
list<const char*>alist;
switch(_mode){
case Word:
alist.push_back("([^ \t\n]*)([ \t\n]*)");
break;
case Line:
alist.push_back("^(.*)$\n");
break;
case RFC:
alist.push_back("((^.*$)((\n)^[ \t]+.*$)*)(\n)?");
//this works, but output is confusing
// that is, how to remove the glue ?
break;
case Custom:
//break;
default:
cerr<<"RegexTokenizer mode constructor called with pointless mode."<<endl;
}
set(_input,alist);
}
RegexTokenizer::RegexTokenizer(string _input,const char* oneregex){
//create a list
list<const char*>alist;
alist.push_back(oneregex);
set(_input,alist);
}
RegexTokenizer::RegexTokenizer(string _input,list<const char*> _regex){
set(_input,_regex);
}
RegexTokenizer::RegexTokenizer(const RegexTokenizer &r){
//cerr<<"(copy constructor)"<<endl;
set(r.input,r.regex_src);
// result= r.result; "ANSI C++ fobids ..."
memcpy(&result[0], &r.result[0], N_pm*sizeof(result[0]) );
whichregexwasmatched= r.whichregexwasmatched;
// pm= r.pm;
memcpy(&pm[0], &r.pm[0], N_pm*sizeof(pm[0]) );
I_pm= r.I_pm;
error= r.error;
so= r.so;
eo= r.eo;
previous_eo= r.previous_eo;
mode= r.mode;
}
RegexTokenizer RegexTokenizer::begin() const
{
//cerr<<"(begin)"<<endl;
RegexTokenizer RT(*this);
RT.error= 0;
RT.so= 0;
RT.eo= 0;
RT.previous_eo= -1;
return RT;
}
RegexTokenizer RegexTokenizer::end() const
{
//cerr<<"(end)"<<endl;
RegexTokenizer RT(*this);
RT.error= 1;
RT.so= input.length();
RT.eo= input.length();
RT.previous_eo= RT.eo;
return RT;
}
void RegexTokenizer::advance(){
//try all patterns until one matches
//cerr<<"advance"<<endl;
//wonder where to get the string from ?
//using a char * buffer is ugly, but there is no regex for string
// (no regex stuff which I'm aware of at the time of writing (1999) )
if(eo < (signed int)input.size()){
// there is no c_substr(eo,N_substring) ;-(
string sWorkspace(input,eo,N_substring);
// waste of time, but I´m not sure when sWorkspace.c_str() gets freed;
strncpy(workspace, sWorkspace.c_str(), N_substring) ;
}
else
workspace[0]='\0';
result[0]= string();
if(
error == 0 && /* regex ok ? */
*workspace != 0 && /* check end buffer */
previous_eo < eo /* make sure we finish */
)
{/* while matches found */
//cerr<<"go over regex's supplied"<<endl;
list<regex_t>::iterator first= regex.begin();
list<regex_t>::iterator last = regex.end();
error= 1;
previous_eo= eo;
while(error && result[0].empty() && first!=last){//check for empty buffer
{
//cerr<<endl <<"matching "<< workspace + eo<< endl;
/* substring found between pm.rm_so and pm.rm_eo */
/* This call to regexec() finds the next match */
error = regexec(&*first, workspace, N_pm, &pm[0], 0);
++first;
}
if(!error){
int final_so= eo;
int final_eo= eo;
//Go over the members of pm to see submatches
int i;
i=N_pm; while(--i>0){ result[i]= string(); }
i=0;
while(i<N_pm &&
pm[i].rm_so>=0 && pm[i].rm_eo>0 &&
pm[i].rm_so<N_substring && pm[i].rm_eo<=N_substring
){
int local_so= previous_eo+pm[i].rm_so;
int local_eo= previous_eo+pm[i].rm_eo;
if(i==0)
{
final_so= local_so;
final_eo= local_eo;
}
result[i]= input.substr(local_so, local_eo-local_so);
//cout <<"match["<<i<<"]{"<<pm[i].rm_so<<","<<pm[i].rm_eo<<"}";
//cout <<"("<< local_so <<","<< local_eo <<"): " << result[i] << endl;
i++;
}
so= final_so;
eo= final_eo;
I_pm= i;
}
else{
(void)regerror(error, &*first, workspace, N_substring);
}
}
}else{
//if the final match has been passed,
//signal end (to make != operator work ?PS)
// like in *this= end();
so= input.length();
eo= input.length();
previous_eo= eo;
}
}
RegexTokenizer::~RegexTokenizer(){
list<regex_t>::iterator first= regex.begin();
list<regex_t>::iterator last = regex.end();
while(first!=last){
//cerr<<"freeing "<<&*first<<endl;
(void) regfree (&*first);
++first;
}
}
ostream& operator<<(ostream &o, const RegexTokenizer &r){
o<<"("<<&r<<" "<<r.previous_eo<<"-"<<r.so<<"/"<<r.eo<<" ?"<<r.error<<")["<<r.input<<"]"<<endl;
return o;
}

View File

@ -0,0 +1,234 @@
/*
* $Source$
* $Revision$
* $Date$
*
* Copyright (c) 1999 by CyberSolutions GmbH.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by CyberSolutions GmbH.
*
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "text.hpp"
// begin test section
// Set verbose to 1 if you want to see the parsed tokens
const int verbose=0;
int test1(){
string input("a22bbb4444ccccc999999999dfgDFG");
// Set up the tokenizer to match the input string
// against a regular expression.
// The entire match will be returned by *rt,
// subexpressions by rt[1], rt[2], rt[3], ..
RegexTokenizer rt(input,"([a-z]*)([^a-z]*)");
int count=0;
RegexTokenizer::iterator next= rt.begin();
const RegexTokenizer::iterator last= rt.end();
if(verbose)
cout << "*** begin 1*** "<<endl;
while(next!=last){
++next;// Preinc - processes input
if(next!=last){
if(verbose)
cout << *next // Entire match,
<<"="
<<next[1] // first subexpression,
<<"+"
<<next[2] // 2nd subexpr.
<< endl;
}
count++;
}
if(verbose)
cout << "--- end 1 ---"<<count<<endl;
if(count != 5)
return 1;
return 0;
}
int test2(){
string input("Word-Satz 1\n Satz 1a\nSatz 2\n\tSatz2a.");
// Set up the tokenizer to match the input string
// against a regular expression.
// The entire match will be returned by *rt,
// subexpressions by rt[1], rt[2], rt[3], ..
RegexTokenizer rt(input,"([^ \t\n]*)([ \t\n]*)");
int count=0;
RegexTokenizer::iterator next= rt.begin();
const RegexTokenizer::iterator last= rt.end();
if(verbose)
cout << "*** begin 2*** "<<endl;
while(next!=last){
++next;// Preinc - processes input
if(next!=last){
if(verbose)
cout <<next[1] // first matched subexpression
<<"["
<<next[2] // second matched subexpression
<<"]"<< endl;
}
count++;
}
if(verbose)
cout << "--- end 2 ---"<<count<<endl;
if(count != 8)
return 1;
return 0;
}
int testWord(){
string input("Ein Satz aus vielen langen Wor-ten.\nUnd ein zweiter Satz.2\n3");
// Set up the tokenizer to match the input string
// against a regular expression that defines the word-wise tokenizing.
// The expression used is "([^ \t\n]*)([ \t\n]*)" .
// The entire match will be returned by *rt,
// subexpressions by rt[1], rt[2], rt[3], ..
RegexTokenizer rt(input,RegexTokenizer::Word);
int count=0;
RegexTokenizer::iterator next= rt.begin();
const RegexTokenizer::iterator last= rt.end();
if(verbose)
cout << "*** begin Word*** "<<endl;
while(next!=last){
++next;// Preinc - processes input
if(next!=last){
if(verbose)
cout <<next[1] // first matched subexpression,
<<"["
<<next[2] // 2nd matched subexpr.
<<"]"<< endl;
}
count++;
}
if(verbose)
cout << "--- end Word ---"<<count<<endl;
if(count != 12)
return 1;
return 0;
}
int testLine(){
string input("Line-Satz 1\n Satz 1a\nSatz 2\n\tSatz2a.");
// Set up the tokenizer to match the input string
// against a regular expression that defines line by line tokenizing.
// The expression used is "^(.*)$\n" .
// The entire match will be returned by *rt,
// subexpressions by rt[1], rt[2], rt[3], ..
RegexTokenizer rt(input,RegexTokenizer::Line);
int count=0;
RegexTokenizer::iterator next= rt.begin();
const RegexTokenizer::iterator last= rt.end();
if(verbose)
cout << "*** begin Line*** "<<endl;
while(next!=last){
++next;// Preinc - processes input
if(next!=last){
if(verbose)
cout <<"'"
<<next[1] // first matched subexpression
<<"'"<<"["
<<next[2] // second matched subexpression
<<"]"<< endl;
}
count++;
}
if(verbose)
cout << "--- end Line ---"<<count<<endl;
if(count != 5)
return 1;
return 0;
}
int testRFC(){
string input("RFC-Satz 1\n Satz 1a\nSatz 2\n\tSatz2a\n\tSatz2b.");
// Set up the tokenizer to match the input string
// against a regular expression that defines RFC-style tokenizing.
// The expression used is "((^.*$)((\n)^[ \t]+.*$)*)(\n)?" .
// Bug: whitespace that glues one line to the next is not removed.
// (afaik, there not way to do this with a single regular expression).
// The entire match will be returned by *rt,
// subexpressions by rt[1], rt[2], rt[3], ..
RegexTokenizer rt(input,RegexTokenizer::RFC);
int count=0;
RegexTokenizer::iterator next= rt.begin();
const RegexTokenizer::iterator last= rt.end();
if(verbose)
cout << "*** begin RFC*** "<<endl;
while(next!=last){
++next;// Preinc - processes input
if(next!=last){
if(verbose)
cout <<"'"<<next[1]<<"'"<<"["<<next[2]<<"]"<<"["<<next[3]<<"]"<< endl;
// first, second and third matched subexpression
}
count++;
}
if(verbose)
cout << "--- end RFC ---"<<count<<endl;
if(count != 3)
return 1;
return 0;
}
int main(int argc, char *argv[])
{
return test1() || test2() || testWord() || testLine() || testRFC() ;
}

13
libtext/SMakefile Normal file
View File

@ -0,0 +1,13 @@
#
# Skeleton Makefile -- process with smake to create real Makefile.
#
# $Header$
#
.opt -o GNUmakefile.in
LIBRARY = text
SRCS = easy_pattern_match.c find_next_line.c find_string.c easy_sprintf.c \
transform_text.c wordwrap.c
.include <library.smk>
.include <autoconf.base.smk>

380
libtext/TokenIterator.cpp Normal file
View File

@ -0,0 +1,380 @@
/*
* $Source$
* $Revision$
* $Date$
*
* Copyright (c) 1999 by CyberSolutions GmbH, Germany.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by CyberSolutions GmbH.
*
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cstdlib>
#include <cstring>
#include <strstream>
#include "text.hpp"
static int mystrpos(const char *c,char s){
int i=0;
while(c[i])
{
if(c[i]==s){
return i;
}
i++;
};
if(!c[i])
return -1;
else
return -1;
}
string TokenIterator::mooncheese= string("The Moon is A green cheese (sheesh!).");
void TokenIterator::reset(){
i= (istream*)0;
brace= 0; bracestack[0]='\0';
braceoftoken= 0;
thesep= '\0'; previoussep= '\0';
eoltoken= '\n';
whitetoken= ' ';
buffer= mooncheese;
}
void TokenIterator::setMode(Mode m){
mode= m;
switch(mode){
case Word:
whitespace=" \t";
separator="";
continuation="";
leftbrace="\"";
rightbrace="\"";
escapechar = '\\';
break;
case Line:
whitespace="";
separator="";
continuation="";
leftbrace="";
rightbrace="";
escapechar = '\\';
break;
case RFC:
whitespace="";
separator="";
continuation=" \t";
leftbrace="";
rightbrace="";
escapechar ='\\';
break;
default:
whitespace = " \t";
separator = ",;:+-=/\\@";
continuation="";
leftbrace = "\"([{<";
rightbrace = "\")]}>";
escapechar = '\\';
}
}
TokenIterator::TokenIterator(){
reset();
braces= false;
setMode(Word);
ismyistream= false;
}
TokenIterator TokenIterator::finalIterator = TokenIterator();
//TokenIterator::TokenIterator(string s, Mode m=Word, bool b=false){
TokenIterator::TokenIterator(string s, Mode m, bool b){
reset();
braces= b;
setMode(m);
ismyistream= true;
i= new istrstream(s.c_str());
//++(*this);// read first value (not done; makes this unwieldly)
}
//TokenIterator::TokenIterator(istream &is, Mode m=Word, bool b=false){
TokenIterator::TokenIterator(istream &is, Mode m, bool b){
reset();
braces= b;
setMode(m);
ismyistream= false;
i= &is;
//++(*this);// read first value (not done; makes this unwieldly)
}
TokenIterator::~TokenIterator(){
if(ismyistream)
delete i;
}
TokenIterator::iterator& TokenIterator::begin() const
{
if( i && i->good() && !i->eof() )
return *const_cast<TokenIterator*> (this);
else
return finalIterator;
};
//! from Input Iterator
//! Returns the next object in the stream.
TokenIterator::operator string() const
{
return buffer;
};
//! from Input Iterator
//! Returns the next object in the stream.
TokenIterator::operator string()
{
if( buffer== mooncheese )
(*this)();
return buffer;
};
//! from Input Iterator
//! Returns the next object in the stream.
const string TokenIterator::operator*() const
{
return buffer;
};
//! from Input Iterator
//! Returns the next object in the stream.
const string TokenIterator::operator*()
{
if( buffer== mooncheese )
(*this)();
return buffer;
};
//! from Input Iterator
//! Preincrement.
TokenIterator& TokenIterator::operator++()
{
(*this)(); return *this;
};
//! from Input Iterator
//! Postincrement.
//! this works .. almost
TokenIterator& TokenIterator::operator++(int i)
{
static TokenIterator t = *this;
while(i>0){ --i; (*this)++; }
return t;
};
/** compare not equal */
bool TokenIterator::operator != (TokenIterator &R) const{// const & I say, const
// note: const TokenIterator &R will create a copy of R :-(
// this can't work; have to allow use of const in the above
// has to be compared differently( endflags .. ! )
return &R!= this;
}
/** compare two Tokenizers */
bool TokenIterator::operator == (TokenIterator &R) const{
// note: const TokenIterator &R will create a copy of R :-(
// this can't work; have to allow use of const in the above
// has to be compared differently( endflags .. ! )
return !( *this != R );
}
/** need this for foreach template */
bool TokenIterator::operator ! (void) const{
return !( i && i->good() && !i->eof() );
}
/** need this for fun */
bool TokenIterator::hastoken(void) const{
return i && i->good() && !i->eof();
}
inline bool linefeed(char c, istream *i){
if(c=='\r'){
char d;
if( i->get(d) ){
if(d=='\n')
;/* dos line feed */
else
i->unget();
}
return true;
}else if(c=='\n'){
char d;
if( i->get(d) ){
if(d=='\r')
;/* carriage return after line feed(?) */
else
i->unget();
}
return true;
}
return false;
}
string TokenIterator::operator()(){
char c= 0;
int pos;
previoussep= thesep;
buffer= string("");
while( i->get(c) ){
if(c==escapechar){
char d;// special translations need to be plugged in here
if( i->get(d) ){
if( brace && linefeed(d,i) )
buffer+= '\n';
else
buffer+= d;
}
}
else if( linefeed(c,i) ){
thesep= eoltoken;
{
switch(mode){
case Word:
if( previoussep!=whitetoken || buffer.length() )// space" = "
return buffer;
break;
case Line:
return buffer;
break;
case RFC:
{
char d;
if( i->get(d) ){
if(!strchr(continuation,d) ){
i->unget();
return buffer;
}else
i->unget();
}
do{
if(!i->get(d)){ return buffer; }
}while( strchr(continuation,d) );
//should "A\n \tB" be returned as one token "AB" or as "A B" ?
// currently, "AB" is returned
i->unget();// unget
}
break;
default:
return buffer;
}
}
}
else if( !(brace) && strchr(whitespace,c) ){ // brace>0 implies braces==true
if(buffer.length()){
thesep= whitetoken;
return buffer;// send token
}else
previoussep= whitetoken;// !?
;/* skip */
}else if(strchr(separator,c)){
thesep= c;
if( previoussep!=whitetoken || buffer.length() )// space" = "
return buffer;// send token
}else if(brace>0 && bracestack[brace]==c){
/* closing brace */
braceoftoken= brace;
brace--; /* pop stack of braces */
thesep= c;
return buffer;// send token
}else if( braces && (pos=mystrpos(leftbrace,c), pos>=0) ){//pos>0
/* opening brace */
braceoftoken= brace;
bracestack[++brace]= rightbrace[pos];
if( previoussep!=whitetoken || buffer.length() ){// space" = "
thesep= c;
return buffer;// send token
}
}else{
/* normal, append to token */
buffer+= c;
}
}
return buffer;
}
LexxStyleTokenIterator::LexxStyleTokenIterator(TokenIterator *Tbase){
state=0;
base= Tbase;
}
LexxStyleToken& LexxStyleTokenIterator::operator()(){
state= !state;
thetoken.ttype= (LexxStyleToken::Tokentype)state;
if(state){
thetoken.Tstring= (*base)();
}else{
thetoken.Tchar= base->thesep;
}
return thetoken;
}

View File

@ -0,0 +1,222 @@
/*
* $Source$
* $Revision$
* $Date$
*
* Copyright (c) 1999 by CyberSolutions GmbH.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by CyberSolutions GmbH.
*
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cstdlib>
#include <fstream.h>
#include <string>
#include "text.hpp"
#include <algo.h>
// Set verbose=1 to see the tokens
const int verbose=1;
/**
The do_sth_with_aTokenIterator class
was written with the intent
to have the class written to cout
using the for_each template
*/
class do_sth_with_aTokenIterator {
string s;
public:
explicit do_sth_with_aTokenIterator() : s() {};
void operator()(const TokenIterator& s)
{ cout<< *s <<endl; };
//! Postincrement.
do_sth_with_aTokenIterator& operator++(int i)
{
};
};
int main(int argc,char *argv[]){
{
//Tokenize words
ifstream i("test.txt");
if(!i)
{ cerr<<"Test Data not found(test.txt)"<< endl; return(2); }
//Initialize the Tokenizer for word mode
TokenIterator tokenize(i,TokenIterator::Word);
string token="";
int count=0;
if(verbose)
cout<<endl<<"--Word"<<endl;
//Loop over all tokens
while( tokenize.hastoken() ){
token= tokenize();
if(verbose)
cout<<":"<<token<<"\n";
count ++;
}
if(verbose)
cout<<endl<<count<<endl;
if(count!=27)
;//return(1);
}
{
//Tokenize words, with " "
ifstream i("test.txt");
if(!i) return 255;
//Initialize the Tokenizer for word mode, "a b" is one word
TokenIterator tokenize(i,TokenIterator::Word,true);
string token="";
int count=0;
if(verbose)
cout<<endl<<"--\"Word\""<<endl;
//Loop over all tokens
while( tokenize.hastoken() ){
token= tokenize();
if(verbose)
cout<<":"<<token<<"\n";
count ++;
}
if(verbose)
cout<<endl<<count<<endl;
if(count!=25)
;//return(1);
}
{
//Tokenize lines
ifstream i("test.txt");
if(!i) return 255;
//Initialize the Tokenizer for line mode ( one line == one token )
TokenIterator tokenize(i,TokenIterator::Line);
string token="";
int count=0;
if(verbose)
cout<<endl<<"--Line"<<endl;
while( tokenize.hastoken() ){
token= tokenize();
if(verbose)
cout<<":"<<token<<"\n";
count ++;
}
if(verbose)
cout<<endl<<count<<endl;
if(count!=10)
;//return(1);
}
{
//Tokenize 'RFC-style'
ifstream i("test.txt");
if(!i) return 255;
//Initialize Tokenizer for RFC mode
// ( If the following line starts with space or tabulator,
// it is glued to the previous line ).
TokenIterator tokenize(i,TokenIterator::RFC);
string token="";
int count=0;
if(verbose)
cout<<endl<<"--RFC"<<endl;
//Loop over all tokens
while( tokenize.hastoken() ){
token= tokenize();
if(verbose)
cout<<":"<<token<<"\n";
count ++;
}
if(verbose)
cout<<endl<<count<<endl;
if(count!=5)
;//return(1);
}
//trying sequence capability ..
{
//Tokenize words
ifstream i("test.txt");
if(!i) return 255;
TokenIterator tokenize(i,TokenIterator::Word);
string token="";
int count=0;
if(verbose)
cout<<endl<<"--Word(seq)"<<endl;
//TokenIterator has only dummy capabilities, hence the warnings
// or maybe I just don't get what a unary function is ?
/* for_each (
tokenize.begin(),
tokenize.end(),
do_sth_with_aTokenIterator()
)
++count;
*/
if(verbose)
cout<<endl<<count<<endl;
if(count!=0)
cerr<<"You mean someone repaired that ?!"<<endl;
}
// there are different way to use the TokenIterator;
// this way seems intuitive to me.
// testing this way ..
{
ifstream i("test.txt");
if(!i) return 255;
TokenIterator lines(i, TokenIterator::Line);
cout<<endl;
cout<<"testing while( line= ++lines, ( lines.begin() != lines.end() ) )"
<<"\n cout<<\"<<line<<\"<<endl; "<<endl;
string line;
while( line= ++lines, ( lines.begin() != lines.end() ) )
cout<<"\""<<line<<"\""<<endl;
}
return 0;
}

28
libtext/configure.in Normal file
View File

@ -0,0 +1,28 @@
dnl
dnl configure.in -- Process this file with autoconf to produce a configure script.
dnl
dnl $Header$
dnl
AC_INIT(text.h)
dnl Checks for paths and programs.
dnl
AC_PROG_CC
AC_PROG_RANLIB
dnl Enable warning flags for gcc.
dnl
if test "$GCC" = yes; then
CFLAGS="$CFLAGS -Wall -pedantic"
CXXFLAGS="$CXXFLAGS -Wall -pedantic"
fi
dnl Remove '-g' and '-O2' from the compile flags.
dnl
CFLAGS=`echo $CFLAGS | sed -e "s/-g//" -e "s/-O2//"`
CXXFLAGS=`echo $CXXFLAGS | sed -e "s/-g//" -e "s/-O2//"`
dnl Write results.
dnl
AC_OUTPUT(GNUmakefile)

View File

@ -1,10 +1,39 @@
/*
* $Source$
* $Revision$
* $Date$
* $Source$
* $Revision$
* $Date$
*
* Copyright (C) 1996 by CyberSolutions GmbH.
* All rights reserved.
* Copyright (c) 1996-99 by Peter Simons <simons@cys.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Peter Simons.
*
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/types.h>

View File

@ -1,10 +1,39 @@
/*
* $Source$
* $Revision$
* $Date$
* $Source$
* $Revision$
* $Date$
*
* Copyright (C) 1996,97 by CyberSolutions GmbH.
* All rights reserved.
* Copyright (c) 1996-99 by Peter Simons <simons@cys.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Peter Simons.
*
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdarg.h>

View File

@ -1,10 +1,39 @@
/*
* $Source$
* $Revision$
* $Date$
* $Source$
* $Revision$
* $Date$
*
* Copyright (C) 1996,97 by CyberSolutions GmbH.
* All rights reserved.
* Copyright (c) 1996-99 by Peter Simons <simons@cys.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Peter Simons.
*
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "text.h"

View File

@ -1,10 +1,39 @@
/*
* $Source$
* $Revision$
* $Date$
* $Source$
* $Revision$
* $Date$
*
* Copyright (C) 1996 by CyberSolutions GmbH.
* All rights reserved.
* Copyright (c) 1996-99 by Peter Simons <simons@cys.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Peter Simons.
*
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>

14
libtext/test.txt Normal file
View File

@ -0,0 +1,14 @@
Ein Wort ist kein Satz,
aber ein Satz ist kein Ersatz für ein Wort.
"Das Wort" ist der "Ein Satz" im "Vor Wort".
Dagegen kommt das "Vor Spiel" vor dem Spiel,
obwohl das eigentliche "Kommen" im Spiel stattfindet.
So gesehen, ist das "Vor Spiel" ein richtiges Spiel,
während das sogenannte Spiel der "Volle Ernst" ist.
Andererseits, wenn Ernst voll ist, findet meist weder "Vor Spiel" noch ein Spiel statt.
Dieses Spiel hatte dann schon vorher stattgefunden;
Meistens war es dann ein Lokaltermin des "Fußball Clubs Bayern".
Darauf folgte ein Termin im Lokal; Nach dem Fußballspiel zog Ernst das Lokal
der "Oben ohne Bar" vor;
Denn in der Bar ging nichts ohne, weder ohne das eine, noch ohne unten.
Des weiteren hatte ein Besuch in der Bar für Ernst meist ein "Nach Spiel" zu Hause.

View File

@ -1,10 +1,39 @@
/*
* $Source$
* $Revision$
* $Date$
* $Source$
* $Revision$
* $Date$
*
* Copyright (C) 1996,97 by CyberSolutions GmbH.
* All rights reserved.
* Copyright (c) 1996-99 by Peter Simons <simons@cys.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Peter Simons.
*
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __LIB_TEXT_H__
@ -19,17 +48,19 @@
/********** Useful defines and declarations **********/
#ifndef __cplusplus
#ifndef __HAVE_DEFINED_BOOL__
# define __HAVE_DEFINED_BOOL__ 1
typedef int bool;
#endif
#ifndef FALSE
# define FALSE (0==1)
#endif
#ifndef TRUE
# define TRUE (1==1)
#endif
#endif
enum {
TEXT_REGEX_OK = 0,
TEXT_REGEX_ERROR,

539
libtext/text.hpp Normal file
View File

@ -0,0 +1,539 @@
/*
* $Source$
* $Revision$
* $Date$
*
* Copyright (c) 1999 by CyberSolutions GmbH.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by CyberSolutions GmbH.
*
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __LIB_TEXT_HPP__
#define __LIB_TEXT_HPP__
#include <stdexcept>
#include <string>
#include <cstring>
#include <iterator>
#include <list>
#include <sys/types.h>
#include "../RegExp/RegExp.hpp"
/** \file text.hpp
A library for text parsing and manipulation.
This library contains a couple of useful functions for dealing
with strings, most notably a regular expression class and a
generic config file parser.
*/
//////////////////////////////////////////////////
// tokenize() //
//////////////////////////////////////////////////
template<class T>
void tokenize(insert_iterator<T> & ii, const string & buffer,
const char * sep = " \t\r\n")
{
string::size_type pos = 0;
while(pos != string::npos) {
string::size_type end_pos = buffer.find_first_of(sep, pos);
string token = buffer.substr(pos, end_pos-pos);
if (!token.empty()) {
*ii = token;
++ii;
end_pos = buffer.find_first_not_of(sep, end_pos);
}
if (end_pos != string::npos)
end_pos = buffer.find_first_not_of(sep, end_pos);
pos = end_pos;
}
}
//////////////////////////////////////////////////
// RegexTokenizer() //
//////////////////////////////////////////////////
/** The RegexTokenizer extracts tokens from 'string' input.
string or stream input has to be converted to string. This means
the Tokenizer should be useful with large input which is divided
into large chunks. A match is performed against a list of regular
expressions. Each expression defines a match-separator pair.
Regular Expressions are compiled with REG_EXTENDED flag.
*/
class RegexTokenizer: forward_iterator<RegexTokenizer, int> {
public:
/** maximum number of registers, subexpressions */
static const int N_pm=10;
/** maximum length of a match */
static const int N_substring=1024;
/** the workspace */
static char workspace[N_substring+1]; //+1 for trailing \0
/** Modes (other than Custom) make the \a RegexTokenizer use a standard regular expression.
\a Custom : The tokenizer uses the regular expression you specify.
\a Word : The tokenizer gives chunks of input separated by space and tabs.
\a Line : The tokenizer splits input at end of line.
\a RFC : The tokenizer splits input at end of line.
Lines may be continued by starting a new line with spaces or tabs.
These continuation characters are NOT stripped from the tokens.
*/
enum Mode {Custom, Word, Line, RFC};
/** RegexTokenizer is it''s own iterator. */
typedef RegexTokenizer iterator;
private:
string input;
string result[N_pm];
list<const char*>regex_src;// the source regexes needed for copy/begin/end
list<regex_t>regex; // not sure multiple regexes are a smart idea
int whichregexwasmatched;
regmatch_t pm[N_pm];
int I_pm; // matched subexpressions
int error; // result of regex calls
int so,eo,previous_eo; // positions
//int matchMask;//bitset; which fields to return by the * operator
protected:
Mode mode;
void advance();
void reset();
int set(string _input,list<const char*> _regex);
public:
/** default constructor. */
RegexTokenizer();
/** Tokenize a string in a mode. */
RegexTokenizer(string _input,Mode _mode);
/** Tokenize a string according to a single regular expression. */
RegexTokenizer(string _input,const char* oneregex);
/** Tokenize a string according to several regular expressions.
(If the first regular expression fails, the next one will be tried. )
*/
RegexTokenizer(string _input,list<const char*> _regex);
/** copy constructor */
RegexTokenizer(const RegexTokenizer &r);
//void selectFields(int m){ matchMask= m; }
/** The begin state */
RegexTokenizer begin() const;
/** The end state */
RegexTokenizer end() const;
/**
from Input Iterator
Returns the current token.
*/
const string operator*() const
{ return result[0]; };
/** from Input Iterator
Returns the i-th matched subexpression.
*/
const string operator[](int i) const
{ return result[i]; };
/** from Input Iterator
PreIncrement
*/
RegexTokenizer& operator++()
{ (*this).advance(); return *this; };
/** from Input Iterator
PostIncrement
*/
RegexTokenizer& operator++(int i)
{ while(i>0){ (*this).advance(); --i; }; return *this; };
/** Destructor */
virtual ~RegexTokenizer();
/** compare not equal */
bool operator != (const RegexTokenizer &R) const{// const & I say, const
return so != R.so || eo != R.eo || previous_eo != R.previous_eo;
}
/** compare two RegexTokenizers */
bool operator == (const RegexTokenizer &R) const{
return !( *this != R );
}
/** print the current state of the RegexTokenizer */
friend ostream& operator<<(ostream &o,const RegexTokenizer &r);
};
//////////////////////////////////////////////////
// TokenIterator //
//////////////////////////////////////////////////
/** The TokenIterator extracts tokens from string or stream input.
There are four main modes and a custom mode. In all modes, the
backslash works as an escape character for the next character i.e.
'one\\\\backslash' is read as 'one\backslash'.
Description of the main modes:
1. Words separated by whitespace, with "whitespace" consisting of
tabulators and the blank.
\code
TokenIterator tokenize(inputStr,TokenIterator::Word);
\endcode
2. Words separated by whitespace, "one word" is one token.
whitespace is defined to be only tabulators and the blank.
\code
TokenIterator tokenize(inputStr,TokenIterator::Word,true);
\endcode
3. Each line is a token.
Escaped newlines will become part of the token.
example:
\code
TokenIterator tokenize(inputStr,TokenIterator::Line);
\endcode
4. RFC style:
Whitespace at start of next line appends next line.
The use of escaping the newline to append the next line,
like in Makefiles, is NOT part of this mode.
example:
\code
TokenIterator tokenize(inputStr,TokenIterator::RFC);
\endcode
5. The Custom Mode: The custom mode is intended for reading from
data that is in almost human-readable-format, like /etc/passwd.
Separating elements are not returned as Tokens, but are stored in
thesep and previoussep. In /etc/passwd ':' is the separator,
while newlines separate records.
\code
class MyCustomTokenIterator: public TokenIterator{
public:
MyCustomTokenIterator(string inputStr, bool b=false)
: TokenIterator(inputStr,TokenIterator::Custom, b){
eoltoken= '\n';
separator= ":\n";
};
MyCustomTokenIterator(istream &inputStr, bool b=false)
: TokenIterator(inputStr,TokenIterator::Custom, b){
eoltoken= '\n';
separator= ":\n";
};
\endcode
See \a CustomTokenIterator.cpp for the full example.
Bugs (Custom Mode): Does not recognize a separator preceded by whitespace
Instead, the tokenizer will collapse a series of whitespace, but
will offer it as a separator in thesep.
This is probably not what you want.
*/
class TokenIterator:istream_iterator<string,int> {
private:
istream *i;
bool ismyistream;
string buffer;
static TokenIterator finalIterator;
static string mooncheese;
public:
/** \relates TokenIterator
The modes allowed as arguments.
*/
enum Mode {Word, Line, RFC, Custom};
typedef TokenIterator iterator;
protected:
int brace;
int braceoftoken;
string bracestack;
bool braces;
Mode mode;
const char *whitespace; // ALL whitespace must be listed here
const char *separator; // separators
const char *continuation;// lists continuation
const char *leftbrace; // leftbrace[i] matches rightbrace[i]
const char *rightbrace; // supports multiple levels of braces
char escapechar;// escapechar is the escape char; default \ .
char eoltoken; // use this instead of end of line
char whitetoken;// use this instead of whitespace
void setMode(Mode m);
void reset();
public:
/**
Returns one token each call.
An empty token does NOT signal the end of the input.
*/
virtual string operator()();
/** Dummy constructor */
/** constructs an Iterator that has reached end */
TokenIterator();
/** Constructor used to tokenize a string s,
using \a Mode m (default is Words),
by default without braces.
*/
TokenIterator(string s, Mode m=Word, bool braces=false);
/** Constructor used to tokenize from an input stream,
using \a Mode m (default is Words),
by default without braces.
The input stream is consumed, which is why
the TokenIterator doesn''t offer backward iterator capabilities.
*/
TokenIterator(istream &is, Mode m=Word, bool braces=false);
/** A begin function returning bool.
\a begin and \a end functions have been crafted to
work with this way of using iterators:
\code
ifstream is(somefilename);
TokenIterator tokenize(is);
while( tokenize->begin() != tokenize->end() ){
string token= tokenize();
...
}
\endcode
*/
iterator& begin() const;
/** A end function returning an iterator. See \a begin .
*/
inline iterator& end() const{ return finalIterator; };
virtual ~TokenIterator();
//! from Input Iterator
//! Returns the current object in the stream.
operator string() const;
//! from Input Iterator
//! Returns the current object in the stream,
//! and the next object if the stream hasn't been read yet
operator string();
//! from Input Iterator
//! Returns the current object in the stream.
const string operator*() const;
//! from Input Iterator
//! Returns the current object in the stream,
//! and the next object if the stream hasn't been read yet
const string operator*();
//! from Input Iterator
//! Preincrement.
TokenIterator& operator++();
//! from Input Iterator
//! Postincrement.
//! this works .. almost
TokenIterator& operator++(int i);
/** compare not equal */
bool operator != (TokenIterator &R) const;
/** compare two Tokenizers */
bool operator == (TokenIterator &R) const;
/** need this for foreach template */
bool operator ! (void) const;
/** Introducing an implicit conversion to bool is not */
/** good because it creates an ambiguity, */
/** since bool may be converted implicitly to int and String. */
bool hastoken (void) const;
/** contains the separator that ended the token */
char thesep;
/** holds the separator that preceded the token */
char previoussep;
/** when using braces (in custom mode),
check this to get the number of unclosed braces. */
inline int bracingdepth() const{ return braceoftoken; };
/** use this to compare with instead of end of line \\n */
inline char eolToken() const{ return eoltoken; };
// use this to compare with instead of space */
inline char whiteToken() const{ return whitetoken; };
};
/** \example TokenIterator_test.cpp */
/** \example CustomTokenIterator.cpp */
/**
The LexxStyleToken is returned by the \a LexxStyleTokenIterator
\code
struct LexxStyleToken{
enum Tokentype {T1_separator, T1_string};
Tokentype ttype;
string Tstring;
char Tchar;
};
\endcode
*/
struct LexxStyleToken{
enum Tokentype {T1_separator, T1_string};
Tokentype ttype;
string Tstring;
char Tchar;
};
/**
The \a LexxStyleToken iterator is a wrapper around the
\a TokenIterator . It returns the separators and the parts
of the string that are separated by the separators
in alteration.
*/
class LexxStyleTokenIterator{
private:
TokenIterator *base;
int state;
public:
/**
Return the current token,
without proceeding to the next token.
*/
LexxStyleToken thetoken;
/**
Wrap the TokenIterator in the LexxStyleTokenIterator.
*/
LexxStyleTokenIterator(TokenIterator *Tbase);
/**
Return the next token.
*/
LexxStyleToken& operator()();
};
/**
\a crop_token removes leading and trailing whitespace from a token.
Example:
\code
cout << crop_token( " \thead tail \t" ) << endl; // prints "head tail"
\endcode
*/
inline string crop_token(const string &s, const string whitespace=string(" /t") ){
size_t left = s.find_first_not_of(whitespace.c_str());
size_t right= s.find_last_not_of(whitespace.c_str());
return string(s,left,right-left+1);
};
/** \a text_escape escapes newlines and escape characters
inside a string such that it may be read by the \a TokenIterator
in \a TokenIterator::Line or \a TokenIterator::Word Mode.
*/
inline string text_escape(const string &lines)
{
unsigned int count= 0;
//
// count how many characters have to be escaped
//
for( unsigned int i=0; i<lines.size(); ++i )
if( lines[i]=='\n' || lines[i]=='\\' )
++count;
string result("");
result.reserve( lines.size()+count+1 );
//
// escape characters
//
{
for( unsigned int i=0; i<lines.size(); ++i )
{
if( lines[i]=='\n' || lines[i]=='\\' )
result += '\\';
result += lines[i];
}
}
return result;
}
#endif // !defined(__LIB_TEXT_HPP__)

View File

@ -0,0 +1,60 @@
/*
* $Source$
* $Revision$
* $Date$
*
* Copyright (c) 1999 by CyberSolutions GmbH.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by CyberSolutions GmbH.
*
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <iostream>
#include <list>
#include "text.hpp"
int
main(int argc, char ** argv)
{
//
// Test the tokenizer.
//
list<string> l;
insert_iterator< list<string> > ii(l, l.end());
const string buf("this is a test\n\n\n\r\tskfj \t blax\n");
tokenize(ii, buf);
cout << "Found " << l.size() << " tokens." << endl;
if( l.size()!=6 )
return 1;
copy(l.begin(), l.end(), ostream_iterator<string>(cout, "\n"));
return 0;
}

View File

@ -1,10 +1,39 @@
/*
* $Source$
* $Revision$
* $Date$
* $Source$
* $Revision$
* $Date$
*
* Copyright (C) 1996 by CyberSolutions GmbH.
* All rights reserved.
* Copyright (c) 1996-99 by Peter Simons <simons@cys.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Peter Simons.
*
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/types.h>

View File

@ -1,10 +1,39 @@
/*
* $Source$
* $Revision$
* $Date$
* $Source$
* $Revision$
* $Date$
*
* Copyright (C) 1996,97 by CyberSolutions GmbH.
* All rights reserved.
* Copyright (c) 1996-99 by Peter Simons <simons@cys.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Peter Simons.
*
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <ctype.h>