From 11bb6bc1cabf635b635dc0e637f81ca90d0748cf Mon Sep 17 00:00:00 2001 From: Peter Simons Date: Wed, 13 Dec 2000 15:45:25 +0000 Subject: [PATCH] Imported latest version. --- liblists/Makefile | 69 ---- liblists/SMakefile | 15 + liblists/configure.in | 33 ++ liblists/lists.c | 43 ++- liblists/{lists.h => lists.h.in} | 44 ++- libtext/CustomTokenIterator.cpp | 59 ++++ libtext/Makefile | 87 ----- libtext/RegexTokenizer.cpp | 259 +++++++++++++++ libtext/RegexTokenizer_test.cpp | 234 ++++++++++++++ libtext/SMakefile | 13 + libtext/TokenIterator.cpp | 380 ++++++++++++++++++++++ libtext/TokenIterator_test.cpp | 222 +++++++++++++ libtext/configure.in | 28 ++ libtext/easy_pattern_match.c | 39 ++- libtext/easy_sprintf.c | 39 ++- libtext/find_next_line.c | 39 ++- libtext/find_string.c | 39 ++- libtext/test.txt | 14 + libtext/text.h | 43 ++- libtext/text.hpp | 539 +++++++++++++++++++++++++++++++ libtext/tokenizer_test.cpp | 60 ++++ libtext/transform_text.c | 39 ++- libtext/wordwrap.c | 39 ++- 23 files changed, 2172 insertions(+), 204 deletions(-) delete mode 100644 liblists/Makefile create mode 100644 liblists/SMakefile create mode 100644 liblists/configure.in rename liblists/{lists.h => lists.h.in} (51%) create mode 100644 libtext/CustomTokenIterator.cpp delete mode 100644 libtext/Makefile create mode 100644 libtext/RegexTokenizer.cpp create mode 100644 libtext/RegexTokenizer_test.cpp create mode 100644 libtext/SMakefile create mode 100644 libtext/TokenIterator.cpp create mode 100644 libtext/TokenIterator_test.cpp create mode 100644 libtext/configure.in create mode 100644 libtext/test.txt create mode 100644 libtext/text.hpp create mode 100644 libtext/tokenizer_test.cpp diff --git a/liblists/Makefile b/liblists/Makefile deleted file mode 100644 index 77b2fce..0000000 --- a/liblists/Makefile +++ /dev/null @@ -1,69 +0,0 @@ -# -# liblists Makefile -# -# $Header$ -# - -# Make Rules: -# =========== -# -.c.o: - $(CC) $(CFLAGS) $(CPPFLAGS) -c $< - - -# Compiler Flags: -# =============== -# -CFLAGS = -Wall -CPPFLAGS= - - -# -# Labels: -# ======= -# -SRCS = lists.c -OBJS = $(SRCS:.c=.o) -MANFILES= $(SRCS:.c=.3) - - -# -# Targets -# -.PHONY: all man clean realclean distclean depend - -all: liblists.a - -man: InitList.3 - -clean: - rm -f liblists.a *.o *.3 *.core - -realclean: clean - rm -rf man3 - -distclean: realclean - -depend: - makedepend -Y /usr/include $(SRCS) - @rm -f Makefile.bak - -InitList.3: lists.c - c2man -ilists.h -g lists.c - - -# -# Actions -# -liblists.a: $(OBJS) - rm -f $@ - $(AR) cr $@ $(OBJS) - $(RANLIB) $@ - - -# -# Dependencies -# -# DO NOT DELETE - -lists.o: lists.h diff --git a/liblists/SMakefile b/liblists/SMakefile new file mode 100644 index 0000000..f39dde4 --- /dev/null +++ b/liblists/SMakefile @@ -0,0 +1,15 @@ +# +# Skeleton Makefile -- process with smake to create real Makefile. +# +# $Header$ +# +.opt -o GNUmakefile.in + +LIBRARY = lists +SRCS = lists.c + +.include +.include + +distclean-local:: + rm -f lists.h diff --git a/liblists/configure.in b/liblists/configure.in new file mode 100644 index 0000000..e32c251 --- /dev/null +++ b/liblists/configure.in @@ -0,0 +1,33 @@ +dnl +dnl configure.in -- Process this file with autoconf to produce a configure script. +dnl +dnl $Header$ +dnl + +AC_INIT(lists.h.in) + +dnl Checks for paths and programs. +dnl +AC_PROG_CC +AC_PROG_RANLIB + +dnl Check for optional header files. +dnl +AC_CHECK_HEADERS(strings.h) + +dnl Enable warning flags for gcc. +dnl +if test "$GCC" = yes; then + CFLAGS="$CFLAGS -Wall -pedantic" + CXXFLAGS="$CXXFLAGS -Wall -pedantic" +fi + +dnl Remove '-g' and '-O2' from the compile flags. +dnl +CFLAGS=`echo $CFLAGS | sed -e "s/-g//" -e "s/-O2//"` +CXXFLAGS=`echo $CXXFLAGS | sed -e "s/-g//" -e "s/-O2//"` + +dnl Write results. +dnl +AC_CONFIG_HEADER(lists.h) +AC_OUTPUT(GNUmakefile) diff --git a/liblists/lists.c b/liblists/lists.c index 1b981d9..e823b67 100644 --- a/liblists/lists.c +++ b/liblists/lists.c @@ -1,10 +1,39 @@ /* - * $Source$ - * $Revision$ - * $Date$ + * $Source$ + * $Revision$ + * $Date$ * - * Copyright (C) 1996,97 by CyberSolutions GmbH. - * All rights reserved. + * Copyright (c) 1996-99 by Peter Simons + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Peter Simons. + * + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "lists.h" @@ -114,7 +143,7 @@ isEndOfList(const Node node) assert(node != NULL); if (node == NULL) - return NULL; + return 1; return (node->ln_Next == NULL); } @@ -418,7 +447,7 @@ CountElements(List head) assert(head != NULL); if (head == NULL) - return NULL; + return 0; for (i = 0, node = getFirstNode(head); node != NULL; node = getNextNode(node)) i++; diff --git a/liblists/lists.h b/liblists/lists.h.in similarity index 51% rename from liblists/lists.h rename to liblists/lists.h.in index 83aaf0d..5a731ec 100644 --- a/liblists/lists.h +++ b/liblists/lists.h.in @@ -1,15 +1,47 @@ /* - * $Source$ - * $Revision$ - * $Date$ + * $Source$ + * $Revision$ + * $Date$ * - * Copyright (C) 1996,97 by CyberSolutions GmbH. - * All rights reserved. + * Copyright (c) 1996-99 by Peter Simons + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Peter Simons. + * + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __LIB_LISTS_H__ #define __LIB_LISTS_H__ 1 +/* For autoconf... */ +#undef HAVE_STRINGS_H + #include #include #ifdef HAVE_STRINGS_H /* sunos 4 needs this */ @@ -22,6 +54,7 @@ /********** Useful defines and declarations **********/ +#ifndef __cplusplus #ifndef __HAVE_DEFINED_BOOL__ # define __HAVE_DEFINED_BOOL__ 1 typedef int bool; @@ -32,6 +65,7 @@ typedef int bool; #ifndef TRUE # define TRUE (1==1) #endif +#endif /********** Structures **********/ diff --git a/libtext/CustomTokenIterator.cpp b/libtext/CustomTokenIterator.cpp new file mode 100644 index 0000000..dff219f --- /dev/null +++ b/libtext/CustomTokenIterator.cpp @@ -0,0 +1,59 @@ +#include "text.hpp" +#include + +/** + Example using the Custom Mode of the TokenIterator + */ +class MyCustomTokenIterator: public TokenIterator{ +public: + + MyCustomTokenIterator(string inputStr, bool b=false) + : TokenIterator(inputStr,TokenIterator::Custom, b){ + eoltoken= '\n'; + separator= ":\n"; + whitespace= ""; + }; + + MyCustomTokenIterator(istream &inputStr, bool b=false) + : TokenIterator(inputStr,TokenIterator::Custom, b){ + eoltoken= '\n'; + separator= ":\n"; + whitespace= ""; + }; +}; + + +int main(int argc, char* argv[]){ + ifstream infile("/etc/passwd"); + MyCustomTokenIterator tokenize(infile); + while(!infile.eof()){ + string user= tokenize(); + string password= tokenize(); + string userid = tokenize(); + string groupid= tokenize(); + string description= tokenize(); + string home = tokenize(); + string shell= tokenize(); + + if(password=="x") + password="shadowed"; + + cout << "----"<0){ pm[i].rm_so=-1; pm[i].rm_eo=-1; } + so= 0; + eo= 0; + previous_eo= -1; + error= 0; +} + +int RegexTokenizer::set(string _input,list _regex){ + reset(); + input= _input; + + list::iterator first= _regex.begin(); + list::iterator last = _regex.end(); + + while(first!=last){ + + regex_t re; + int i; + + //REG_EXTENDED + //use extended regular expressions + //REG_NEWLINE + //makes ^...$ work to match newline/endofline + + i= regcomp (&re, *first, REG_EXTENDED|REG_NEWLINE); + if(i) + return i; + regex.push_back(re); + regex_src.push_back(*first); + ++first; + } +} + +RegexTokenizer::RegexTokenizer(string _input,Mode _mode){ + mode= _mode; + //create a list + listalist; + switch(_mode){ + case Word: + alist.push_back("([^ \t\n]*)([ \t\n]*)"); + break; + case Line: + alist.push_back("^(.*)$\n"); + break; + case RFC: + alist.push_back("((^.*$)((\n)^[ \t]+.*$)*)(\n)?"); + //this works, but output is confusing + // that is, how to remove the glue ? + break; + case Custom: + //break; + default: + cerr<<"RegexTokenizer mode constructor called with pointless mode."<alist; + alist.push_back(oneregex); + set(_input,alist); +} + +RegexTokenizer::RegexTokenizer(string _input,list _regex){ + set(_input,_regex); +} + +RegexTokenizer::RegexTokenizer(const RegexTokenizer &r){ + //cerr<<"(copy constructor)"<::iterator first= regex.begin(); + list::iterator last = regex.end(); + error= 1; + + previous_eo= eo; + while(error && result[0].empty() && first!=last){//check for empty buffer + { + //cerr<0){ result[i]= string(); } + i=0; + while(i=0 && pm[i].rm_eo>0 && + pm[i].rm_so::iterator first= regex.begin(); + list::iterator last = regex.end(); + + while(first!=last){ + //cerr<<"freeing "<<&*first< +.include diff --git a/libtext/TokenIterator.cpp b/libtext/TokenIterator.cpp new file mode 100644 index 0000000..4b84984 --- /dev/null +++ b/libtext/TokenIterator.cpp @@ -0,0 +1,380 @@ +/* + * $Source$ + * $Revision$ + * $Date$ + * + * Copyright (c) 1999 by CyberSolutions GmbH, Germany. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by CyberSolutions GmbH. + * + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include "text.hpp" + +static int mystrpos(const char *c,char s){ + int i=0; + while(c[i]) + { + if(c[i]==s){ + return i; + } + i++; + }; + if(!c[i]) + return -1; + else + return -1; +} + +string TokenIterator::mooncheese= string("The Moon is A green cheese (sheesh!)."); + +void TokenIterator::reset(){ + i= (istream*)0; + brace= 0; bracestack[0]='\0'; + braceoftoken= 0; + thesep= '\0'; previoussep= '\0'; + eoltoken= '\n'; + whitetoken= ' '; + buffer= mooncheese; +} + +void TokenIterator::setMode(Mode m){ + mode= m; + switch(mode){ + case Word: + whitespace=" \t"; + separator=""; + continuation=""; + leftbrace="\""; + rightbrace="\""; + escapechar = '\\'; + break; + case Line: + whitespace=""; + separator=""; + continuation=""; + leftbrace=""; + rightbrace=""; + escapechar = '\\'; + break; + case RFC: + whitespace=""; + separator=""; + continuation=" \t"; + leftbrace=""; + rightbrace=""; + escapechar ='\\'; + break; + default: + whitespace = " \t"; + separator = ",;:+-=/\\@"; + continuation=""; + leftbrace = "\"([{<"; + rightbrace = "\")]}>"; + escapechar = '\\'; + } +} + +TokenIterator::TokenIterator(){ + reset(); + braces= false; + setMode(Word); + ismyistream= false; +} + + + +TokenIterator TokenIterator::finalIterator = TokenIterator(); + +//TokenIterator::TokenIterator(string s, Mode m=Word, bool b=false){ +TokenIterator::TokenIterator(string s, Mode m, bool b){ + reset(); + braces= b; + setMode(m); + ismyistream= true; + i= new istrstream(s.c_str()); + //++(*this);// read first value (not done; makes this unwieldly) +} + +//TokenIterator::TokenIterator(istream &is, Mode m=Word, bool b=false){ +TokenIterator::TokenIterator(istream &is, Mode m, bool b){ + reset(); + braces= b; + setMode(m); + ismyistream= false; + i= &is; + //++(*this);// read first value (not done; makes this unwieldly) +} + +TokenIterator::~TokenIterator(){ + if(ismyistream) + delete i; +} + + +TokenIterator::iterator& TokenIterator::begin() const +{ + if( i && i->good() && !i->eof() ) + return *const_cast (this); + else + return finalIterator; +}; + + +//! from Input Iterator +//! Returns the next object in the stream. +TokenIterator::operator string() const +{ + return buffer; +}; + + +//! from Input Iterator +//! Returns the next object in the stream. +TokenIterator::operator string() +{ + if( buffer== mooncheese ) + (*this)(); + return buffer; +}; + + +//! from Input Iterator +//! Returns the next object in the stream. +const string TokenIterator::operator*() const +{ + return buffer; +}; + + +//! from Input Iterator +//! Returns the next object in the stream. +const string TokenIterator::operator*() +{ + if( buffer== mooncheese ) + (*this)(); + return buffer; +}; + + +//! from Input Iterator +//! Preincrement. +TokenIterator& TokenIterator::operator++() +{ + (*this)(); return *this; +}; + + //! from Input Iterator + //! Postincrement. + //! this works .. almost + + +TokenIterator& TokenIterator::operator++(int i) +{ + static TokenIterator t = *this; + while(i>0){ --i; (*this)++; } + return t; +}; + + +/** compare not equal */ +bool TokenIterator::operator != (TokenIterator &R) const{// const & I say, const + // note: const TokenIterator &R will create a copy of R :-( + // this can't work; have to allow use of const in the above + // has to be compared differently( endflags .. ! ) + + return &R!= this; +} + +/** compare two Tokenizers */ +bool TokenIterator::operator == (TokenIterator &R) const{ + // note: const TokenIterator &R will create a copy of R :-( + // this can't work; have to allow use of const in the above + // has to be compared differently( endflags .. ! ) + + return !( *this != R ); +} + +/** need this for foreach template */ +bool TokenIterator::operator ! (void) const{ + return !( i && i->good() && !i->eof() ); +} + +/** need this for fun */ +bool TokenIterator::hastoken(void) const{ + return i && i->good() && !i->eof(); +} + + + +inline bool linefeed(char c, istream *i){ + if(c=='\r'){ + char d; + if( i->get(d) ){ + if(d=='\n') + ;/* dos line feed */ + else + i->unget(); + } + return true; + }else if(c=='\n'){ + char d; + if( i->get(d) ){ + if(d=='\r') + ;/* carriage return after line feed(?) */ + else + i->unget(); + } + return true; + } + return false; +} + + +string TokenIterator::operator()(){ + char c= 0; + int pos; + + previoussep= thesep; + buffer= string(""); + + while( i->get(c) ){ + + if(c==escapechar){ + + char d;// special translations need to be plugged in here + + if( i->get(d) ){ + if( brace && linefeed(d,i) ) + buffer+= '\n'; + else + buffer+= d; + } + } + + else if( linefeed(c,i) ){ + + thesep= eoltoken; + { + switch(mode){ + case Word: + if( previoussep!=whitetoken || buffer.length() )// space" = " + return buffer; + break; + case Line: + return buffer; + break; + case RFC: + { + char d; + if( i->get(d) ){ + if(!strchr(continuation,d) ){ + i->unget(); + return buffer; + }else + i->unget(); + + } + + do{ + if(!i->get(d)){ return buffer; } + }while( strchr(continuation,d) ); + + //should "A\n \tB" be returned as one token "AB" or as "A B" ? + // currently, "AB" is returned + i->unget();// unget + + } + break; + default: + return buffer; + } + } + } + else if( !(brace) && strchr(whitespace,c) ){ // brace>0 implies braces==true + + if(buffer.length()){ + thesep= whitetoken; + return buffer;// send token + }else + previoussep= whitetoken;// !? + ;/* skip */ + + }else if(strchr(separator,c)){ + thesep= c; + if( previoussep!=whitetoken || buffer.length() )// space" = " + return buffer;// send token + + }else if(brace>0 && bracestack[brace]==c){ + + /* closing brace */ + braceoftoken= brace; + brace--; /* pop stack of braces */ + + thesep= c; + return buffer;// send token + + }else if( braces && (pos=mystrpos(leftbrace,c), pos>=0) ){//pos>0 + + /* opening brace */ + braceoftoken= brace; + bracestack[++brace]= rightbrace[pos]; + if( previoussep!=whitetoken || buffer.length() ){// space" = " + thesep= c; + return buffer;// send token + } + }else{ + /* normal, append to token */ + buffer+= c; + } + } + return buffer; +} + + +LexxStyleTokenIterator::LexxStyleTokenIterator(TokenIterator *Tbase){ + state=0; + base= Tbase; +} + +LexxStyleToken& LexxStyleTokenIterator::operator()(){ + state= !state; + thetoken.ttype= (LexxStyleToken::Tokentype)state; + if(state){ + thetoken.Tstring= (*base)(); + }else{ + thetoken.Tchar= base->thesep; + } + return thetoken; +} diff --git a/libtext/TokenIterator_test.cpp b/libtext/TokenIterator_test.cpp new file mode 100644 index 0000000..a4448b9 --- /dev/null +++ b/libtext/TokenIterator_test.cpp @@ -0,0 +1,222 @@ +/* + * $Source$ + * $Revision$ + * $Date$ + * + * Copyright (c) 1999 by CyberSolutions GmbH. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by CyberSolutions GmbH. + * + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include "text.hpp" +#include + + +// Set verbose=1 to see the tokens +const int verbose=1; + +/** + The do_sth_with_aTokenIterator class + was written with the intent + to have the class written to cout + using the for_each template + */ +class do_sth_with_aTokenIterator { + string s; +public: + explicit do_sth_with_aTokenIterator() : s() {}; + void operator()(const TokenIterator& s) + { cout<< *s < + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Peter Simons. + * + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include diff --git a/libtext/easy_sprintf.c b/libtext/easy_sprintf.c index 5a83a7f..2027862 100644 --- a/libtext/easy_sprintf.c +++ b/libtext/easy_sprintf.c @@ -1,10 +1,39 @@ /* - * $Source$ - * $Revision$ - * $Date$ + * $Source$ + * $Revision$ + * $Date$ * - * Copyright (C) 1996,97 by CyberSolutions GmbH. - * All rights reserved. + * Copyright (c) 1996-99 by Peter Simons + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Peter Simons. + * + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include diff --git a/libtext/find_next_line.c b/libtext/find_next_line.c index 9a25659..5ef4c59 100644 --- a/libtext/find_next_line.c +++ b/libtext/find_next_line.c @@ -1,10 +1,39 @@ /* - * $Source$ - * $Revision$ - * $Date$ + * $Source$ + * $Revision$ + * $Date$ * - * Copyright (C) 1996,97 by CyberSolutions GmbH. - * All rights reserved. + * Copyright (c) 1996-99 by Peter Simons + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Peter Simons. + * + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "text.h" diff --git a/libtext/find_string.c b/libtext/find_string.c index 01db929..887dc2f 100644 --- a/libtext/find_string.c +++ b/libtext/find_string.c @@ -1,10 +1,39 @@ /* - * $Source$ - * $Revision$ - * $Date$ + * $Source$ + * $Revision$ + * $Date$ * - * Copyright (C) 1996 by CyberSolutions GmbH. - * All rights reserved. + * Copyright (c) 1996-99 by Peter Simons + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Peter Simons. + * + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include diff --git a/libtext/test.txt b/libtext/test.txt new file mode 100644 index 0000000..31f0c64 --- /dev/null +++ b/libtext/test.txt @@ -0,0 +1,14 @@ +Ein Wort ist kein Satz, + aber ein Satz ist kein Ersatz für ein Wort. + "Das Wort" ist der "Ein Satz" im "Vor Wort". +Dagegen kommt das "Vor Spiel" vor dem Spiel, + obwohl das eigentliche "Kommen" im Spiel stattfindet. +So gesehen, ist das "Vor Spiel" ein richtiges Spiel, + während das sogenannte Spiel der "Volle Ernst" ist. +Andererseits, wenn Ernst voll ist, findet meist weder "Vor Spiel" noch ein Spiel statt. +Dieses Spiel hatte dann schon vorher stattgefunden; + Meistens war es dann ein Lokaltermin des "Fußball Clubs Bayern". +Darauf folgte ein Termin im Lokal; Nach dem Fußballspiel zog Ernst das Lokal + der "Oben ohne Bar" vor; +Denn in der Bar ging nichts ohne, weder ohne das eine, noch ohne unten. +Des weiteren hatte ein Besuch in der Bar für Ernst meist ein "Nach Spiel" zu Hause. diff --git a/libtext/text.h b/libtext/text.h index e64a2da..1c05819 100644 --- a/libtext/text.h +++ b/libtext/text.h @@ -1,10 +1,39 @@ /* - * $Source$ - * $Revision$ - * $Date$ + * $Source$ + * $Revision$ + * $Date$ * - * Copyright (C) 1996,97 by CyberSolutions GmbH. - * All rights reserved. + * Copyright (c) 1996-99 by Peter Simons + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Peter Simons. + * + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __LIB_TEXT_H__ @@ -19,17 +48,19 @@ /********** Useful defines and declarations **********/ +#ifndef __cplusplus #ifndef __HAVE_DEFINED_BOOL__ # define __HAVE_DEFINED_BOOL__ 1 typedef int bool; #endif + #ifndef FALSE # define FALSE (0==1) #endif #ifndef TRUE # define TRUE (1==1) #endif - +#endif enum { TEXT_REGEX_OK = 0, TEXT_REGEX_ERROR, diff --git a/libtext/text.hpp b/libtext/text.hpp new file mode 100644 index 0000000..4e86c90 --- /dev/null +++ b/libtext/text.hpp @@ -0,0 +1,539 @@ +/* + * $Source$ + * $Revision$ + * $Date$ + * + * Copyright (c) 1999 by CyberSolutions GmbH. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by CyberSolutions GmbH. + * + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __LIB_TEXT_HPP__ +#define __LIB_TEXT_HPP__ + +#include +#include +#include +#include +#include +#include +#include "../RegExp/RegExp.hpp" + +/** \file text.hpp + + A library for text parsing and manipulation. + + This library contains a couple of useful functions for dealing + with strings, most notably a regular expression class and a + generic config file parser. +*/ + +////////////////////////////////////////////////// +// tokenize() // +////////////////////////////////////////////////// + +template +void tokenize(insert_iterator & ii, const string & buffer, + const char * sep = " \t\r\n") +{ + string::size_type pos = 0; + while(pos != string::npos) { + string::size_type end_pos = buffer.find_first_of(sep, pos); + string token = buffer.substr(pos, end_pos-pos); + if (!token.empty()) { + *ii = token; + ++ii; + end_pos = buffer.find_first_not_of(sep, end_pos); + } + if (end_pos != string::npos) + end_pos = buffer.find_first_not_of(sep, end_pos); + pos = end_pos; + } +} + +////////////////////////////////////////////////// +// RegexTokenizer() // +////////////////////////////////////////////////// + + +/** The RegexTokenizer extracts tokens from 'string' input. + + string or stream input has to be converted to string. This means + the Tokenizer should be useful with large input which is divided + into large chunks. A match is performed against a list of regular + expressions. Each expression defines a match-separator pair. + Regular Expressions are compiled with REG_EXTENDED flag. +*/ + +class RegexTokenizer: forward_iterator { +public: + + /** maximum number of registers, subexpressions */ + static const int N_pm=10; + + /** maximum length of a match */ + static const int N_substring=1024; + + /** the workspace */ + static char workspace[N_substring+1]; //+1 for trailing \0 + + /** Modes (other than Custom) make the \a RegexTokenizer use a standard regular expression. + + \a Custom : The tokenizer uses the regular expression you specify. + + \a Word : The tokenizer gives chunks of input separated by space and tabs. + + \a Line : The tokenizer splits input at end of line. + + \a RFC : The tokenizer splits input at end of line. + Lines may be continued by starting a new line with spaces or tabs. + These continuation characters are NOT stripped from the tokens. + + */ + enum Mode {Custom, Word, Line, RFC}; + + /** RegexTokenizer is it''s own iterator. */ + typedef RegexTokenizer iterator; +private: + string input; + string result[N_pm]; + listregex_src;// the source regexes needed for copy/begin/end + listregex; // not sure multiple regexes are a smart idea + int whichregexwasmatched; + regmatch_t pm[N_pm]; + int I_pm; // matched subexpressions + int error; // result of regex calls + int so,eo,previous_eo; // positions + //int matchMask;//bitset; which fields to return by the * operator +protected: + Mode mode; + void advance(); + void reset(); + int set(string _input,list _regex); +public: + /** default constructor. */ + RegexTokenizer(); + + /** Tokenize a string in a mode. */ + RegexTokenizer(string _input,Mode _mode); + + /** Tokenize a string according to a single regular expression. */ + RegexTokenizer(string _input,const char* oneregex); + + /** Tokenize a string according to several regular expressions. + (If the first regular expression fails, the next one will be tried. ) + */ + RegexTokenizer(string _input,list _regex); + + /** copy constructor */ + RegexTokenizer(const RegexTokenizer &r); + + //void selectFields(int m){ matchMask= m; } + + /** The begin state */ + RegexTokenizer begin() const; + + /** The end state */ + RegexTokenizer end() const; + + /** + from Input Iterator + Returns the current token. + */ + const string operator*() const + { return result[0]; }; + + + /** from Input Iterator + Returns the i-th matched subexpression. + */ + const string operator[](int i) const + { return result[i]; }; + + /** from Input Iterator + PreIncrement + */ + RegexTokenizer& operator++() + { (*this).advance(); return *this; }; + + /** from Input Iterator + PostIncrement + */ + RegexTokenizer& operator++(int i) + { while(i>0){ (*this).advance(); --i; }; return *this; }; + + /** Destructor */ + virtual ~RegexTokenizer(); + + /** compare not equal */ + bool operator != (const RegexTokenizer &R) const{// const & I say, const + return so != R.so || eo != R.eo || previous_eo != R.previous_eo; + } + + /** compare two RegexTokenizers */ + bool operator == (const RegexTokenizer &R) const{ + return !( *this != R ); + } + + /** print the current state of the RegexTokenizer */ + friend ostream& operator<<(ostream &o,const RegexTokenizer &r); +}; + + +////////////////////////////////////////////////// +// TokenIterator // +////////////////////////////////////////////////// + + +/** The TokenIterator extracts tokens from string or stream input. + + There are four main modes and a custom mode. In all modes, the + backslash works as an escape character for the next character i.e. + 'one\\\\backslash' is read as 'one\backslash'. + + Description of the main modes: + + 1. Words separated by whitespace, with "whitespace" consisting of + tabulators and the blank. + \code + TokenIterator tokenize(inputStr,TokenIterator::Word); + \endcode + + 2. Words separated by whitespace, "one word" is one token. + whitespace is defined to be only tabulators and the blank. + \code + TokenIterator tokenize(inputStr,TokenIterator::Word,true); + \endcode + + 3. Each line is a token. + Escaped newlines will become part of the token. + example: + \code + TokenIterator tokenize(inputStr,TokenIterator::Line); + \endcode + + 4. RFC style: + Whitespace at start of next line appends next line. + The use of escaping the newline to append the next line, + like in Makefiles, is NOT part of this mode. + example: + \code + TokenIterator tokenize(inputStr,TokenIterator::RFC); + \endcode + + 5. The Custom Mode: The custom mode is intended for reading from + data that is in almost human-readable-format, like /etc/passwd. + Separating elements are not returned as Tokens, but are stored in + thesep and previoussep. In /etc/passwd ':' is the separator, + while newlines separate records. + \code + class MyCustomTokenIterator: public TokenIterator{ + public: + + MyCustomTokenIterator(string inputStr, bool b=false) + : TokenIterator(inputStr,TokenIterator::Custom, b){ + eoltoken= '\n'; + separator= ":\n"; + }; + + MyCustomTokenIterator(istream &inputStr, bool b=false) + : TokenIterator(inputStr,TokenIterator::Custom, b){ + eoltoken= '\n'; + separator= ":\n"; + }; + \endcode + See \a CustomTokenIterator.cpp for the full example. + + Bugs (Custom Mode): Does not recognize a separator preceded by whitespace + Instead, the tokenizer will collapse a series of whitespace, but + will offer it as a separator in thesep. + This is probably not what you want. +*/ + + +class TokenIterator:istream_iterator { + +private: + istream *i; + bool ismyistream; + string buffer; + + static TokenIterator finalIterator; + + static string mooncheese; + +public: + /** \relates TokenIterator + The modes allowed as arguments. + */ + enum Mode {Word, Line, RFC, Custom}; + + typedef TokenIterator iterator; + +protected: + int brace; + int braceoftoken; + string bracestack; + bool braces; + Mode mode; + + const char *whitespace; // ALL whitespace must be listed here + const char *separator; // separators + const char *continuation;// lists continuation + const char *leftbrace; // leftbrace[i] matches rightbrace[i] + const char *rightbrace; // supports multiple levels of braces + char escapechar;// escapechar is the escape char; default \ . + char eoltoken; // use this instead of end of line + char whitetoken;// use this instead of whitespace + + void setMode(Mode m); + void reset(); + +public: + /** + Returns one token each call. + An empty token does NOT signal the end of the input. + */ + virtual string operator()(); + + /** Dummy constructor */ + /** constructs an Iterator that has reached end */ + TokenIterator(); + + /** Constructor used to tokenize a string s, + using \a Mode m (default is Words), + by default without braces. + */ + TokenIterator(string s, Mode m=Word, bool braces=false); + + /** Constructor used to tokenize from an input stream, + using \a Mode m (default is Words), + by default without braces. + + The input stream is consumed, which is why + the TokenIterator doesn''t offer backward iterator capabilities. + */ + TokenIterator(istream &is, Mode m=Word, bool braces=false); + + + /** A begin function returning bool. + \a begin and \a end functions have been crafted to + work with this way of using iterators: + \code + ifstream is(somefilename); + TokenIterator tokenize(is); + + while( tokenize->begin() != tokenize->end() ){ + string token= tokenize(); + ... + } + \endcode + */ + iterator& begin() const; + + + /** A end function returning an iterator. See \a begin . + */ + inline iterator& end() const{ return finalIterator; }; + + + virtual ~TokenIterator(); + + //! from Input Iterator + //! Returns the current object in the stream. + operator string() const; + + + //! from Input Iterator + //! Returns the current object in the stream, + //! and the next object if the stream hasn't been read yet + operator string(); + + + //! from Input Iterator + //! Returns the current object in the stream. + const string operator*() const; + + + //! from Input Iterator + //! Returns the current object in the stream, + //! and the next object if the stream hasn't been read yet + const string operator*(); + + + //! from Input Iterator + //! Preincrement. + TokenIterator& operator++(); + + //! from Input Iterator + //! Postincrement. + //! this works .. almost + TokenIterator& operator++(int i); + + + /** compare not equal */ + bool operator != (TokenIterator &R) const; + + + /** compare two Tokenizers */ + bool operator == (TokenIterator &R) const; + + + /** need this for foreach template */ + bool operator ! (void) const; + + + /** Introducing an implicit conversion to bool is not */ + /** good because it creates an ambiguity, */ + /** since bool may be converted implicitly to int and String. */ + bool hastoken (void) const; + + + + /** contains the separator that ended the token */ + char thesep; + + /** holds the separator that preceded the token */ + char previoussep; + + /** when using braces (in custom mode), + check this to get the number of unclosed braces. */ + inline int bracingdepth() const{ return braceoftoken; }; + + /** use this to compare with instead of end of line \\n */ + inline char eolToken() const{ return eoltoken; }; + + // use this to compare with instead of space */ + inline char whiteToken() const{ return whitetoken; }; +}; +/** \example TokenIterator_test.cpp */ +/** \example CustomTokenIterator.cpp */ + + +/** + The LexxStyleToken is returned by the \a LexxStyleTokenIterator +\code + struct LexxStyleToken{ + enum Tokentype {T1_separator, T1_string}; + Tokentype ttype; + string Tstring; + char Tchar; + }; +\endcode +*/ +struct LexxStyleToken{ + enum Tokentype {T1_separator, T1_string}; + Tokentype ttype; + string Tstring; + char Tchar; +}; + +/** + The \a LexxStyleToken iterator is a wrapper around the + \a TokenIterator . It returns the separators and the parts + of the string that are separated by the separators + in alteration. +*/ +class LexxStyleTokenIterator{ +private: + TokenIterator *base; + int state; +public: + /** + Return the current token, + without proceeding to the next token. + */ + LexxStyleToken thetoken; + + /** + Wrap the TokenIterator in the LexxStyleTokenIterator. + */ + LexxStyleTokenIterator(TokenIterator *Tbase); + + /** + Return the next token. + */ + LexxStyleToken& operator()(); +}; + + +/** + \a crop_token removes leading and trailing whitespace from a token. + Example: + \code + cout << crop_token( " \thead tail \t" ) << endl; // prints "head tail" + \endcode +*/ + +inline string crop_token(const string &s, const string whitespace=string(" /t") ){ + size_t left = s.find_first_not_of(whitespace.c_str()); + size_t right= s.find_last_not_of(whitespace.c_str()); + return string(s,left,right-left+1); +}; + + +/** \a text_escape escapes newlines and escape characters + inside a string such that it may be read by the \a TokenIterator + in \a TokenIterator::Line or \a TokenIterator::Word Mode. +*/ +inline string text_escape(const string &lines) +{ + unsigned int count= 0; + + // + // count how many characters have to be escaped + // + for( unsigned int i=0; i +#include + +#include "text.hpp" + +int +main(int argc, char ** argv) +{ + // + // Test the tokenizer. + // + list l; + insert_iterator< list > ii(l, l.end()); + const string buf("this is a test\n\n\n\r\tskfj \t blax\n"); + tokenize(ii, buf); + cout << "Found " << l.size() << " tokens." << endl; + if( l.size()!=6 ) + return 1; + copy(l.begin(), l.end(), ostream_iterator(cout, "\n")); + + return 0; +} diff --git a/libtext/transform_text.c b/libtext/transform_text.c index 717f97c..94293a6 100644 --- a/libtext/transform_text.c +++ b/libtext/transform_text.c @@ -1,10 +1,39 @@ /* - * $Source$ - * $Revision$ - * $Date$ + * $Source$ + * $Revision$ + * $Date$ * - * Copyright (C) 1996 by CyberSolutions GmbH. - * All rights reserved. + * Copyright (c) 1996-99 by Peter Simons + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Peter Simons. + * + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include diff --git a/libtext/wordwrap.c b/libtext/wordwrap.c index 1d4d1f7..7f9857f 100644 --- a/libtext/wordwrap.c +++ b/libtext/wordwrap.c @@ -1,10 +1,39 @@ /* - * $Source$ - * $Revision$ - * $Date$ + * $Source$ + * $Revision$ + * $Date$ * - * Copyright (C) 1996,97 by CyberSolutions GmbH. - * All rights reserved. + * Copyright (c) 1996-99 by Peter Simons + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Peter Simons. + * + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include