From 11bb6bc1cabf635b635dc0e637f81ca90d0748cf Mon Sep 17 00:00:00 2001
From: Peter Simons <simons@cryp.to>
Date: Wed, 13 Dec 2000 15:45:25 +0000
Subject: [PATCH] Imported latest version.

---
 liblists/Makefile                |  69 ----
 liblists/SMakefile               |  15 +
 liblists/configure.in            |  33 ++
 liblists/lists.c                 |  43 ++-
 liblists/{lists.h => lists.h.in} |  44 ++-
 libtext/CustomTokenIterator.cpp  |  59 ++++
 libtext/Makefile                 |  87 -----
 libtext/RegexTokenizer.cpp       | 259 +++++++++++++++
 libtext/RegexTokenizer_test.cpp  | 234 ++++++++++++++
 libtext/SMakefile                |  13 +
 libtext/TokenIterator.cpp        | 380 ++++++++++++++++++++++
 libtext/TokenIterator_test.cpp   | 222 +++++++++++++
 libtext/configure.in             |  28 ++
 libtext/easy_pattern_match.c     |  39 ++-
 libtext/easy_sprintf.c           |  39 ++-
 libtext/find_next_line.c         |  39 ++-
 libtext/find_string.c            |  39 ++-
 libtext/test.txt                 |  14 +
 libtext/text.h                   |  43 ++-
 libtext/text.hpp                 | 539 +++++++++++++++++++++++++++++++
 libtext/tokenizer_test.cpp       |  60 ++++
 libtext/transform_text.c         |  39 ++-
 libtext/wordwrap.c               |  39 ++-
 23 files changed, 2172 insertions(+), 204 deletions(-)
 delete mode 100644 liblists/Makefile
 create mode 100644 liblists/SMakefile
 create mode 100644 liblists/configure.in
 rename liblists/{lists.h => lists.h.in} (51%)
 create mode 100644 libtext/CustomTokenIterator.cpp
 delete mode 100644 libtext/Makefile
 create mode 100644 libtext/RegexTokenizer.cpp
 create mode 100644 libtext/RegexTokenizer_test.cpp
 create mode 100644 libtext/SMakefile
 create mode 100644 libtext/TokenIterator.cpp
 create mode 100644 libtext/TokenIterator_test.cpp
 create mode 100644 libtext/configure.in
 create mode 100644 libtext/test.txt
 create mode 100644 libtext/text.hpp
 create mode 100644 libtext/tokenizer_test.cpp

diff --git a/liblists/Makefile b/liblists/Makefile
deleted file mode 100644
index 77b2fce..0000000
--- a/liblists/Makefile
+++ /dev/null
@@ -1,69 +0,0 @@
-#
-# liblists Makefile
-#
-# $Header$
-#
-
-# Make Rules:
-# ===========
-#
-.c.o:
-	$(CC) $(CFLAGS) $(CPPFLAGS) -c $<
-
-
-# Compiler Flags:
-# ===============
-#
-CFLAGS  = -Wall
-CPPFLAGS=
-
-
-#
-# Labels:
-# =======
-#
-SRCS 	= lists.c
-OBJS	= $(SRCS:.c=.o)
-MANFILES= $(SRCS:.c=.3)
-
-
-#
-# Targets
-#
-.PHONY: all man clean realclean distclean depend
-
-all:	liblists.a
-
-man:	InitList.3
-
-clean:
-	rm -f liblists.a *.o *.3 *.core
-
-realclean:	clean
-	rm -rf man3
-
-distclean:	realclean
-
-depend:
-	makedepend -Y /usr/include $(SRCS)
-	@rm -f Makefile.bak
-
-InitList.3:	lists.c
-	c2man -ilists.h -g lists.c
-
-
-#
-# Actions
-#
-liblists.a:	$(OBJS)
-	rm -f $@
-	$(AR) cr $@ $(OBJS)
-	$(RANLIB) $@
-
-
-#
-# Dependencies
-#
-# DO NOT DELETE
-
-lists.o: lists.h
diff --git a/liblists/SMakefile b/liblists/SMakefile
new file mode 100644
index 0000000..f39dde4
--- /dev/null
+++ b/liblists/SMakefile
@@ -0,0 +1,15 @@
+#
+# Skeleton Makefile -- process with smake to create real Makefile.
+#
+# $Header$
+#
+.opt -o GNUmakefile.in
+
+LIBRARY		= lists
+SRCS		= lists.c
+
+.include <library.smk>
+.include <autoconf.base.smk>
+
+distclean-local::
+	rm -f lists.h
diff --git a/liblists/configure.in b/liblists/configure.in
new file mode 100644
index 0000000..e32c251
--- /dev/null
+++ b/liblists/configure.in
@@ -0,0 +1,33 @@
+dnl
+dnl configure.in -- Process this file with autoconf to produce a configure script.
+dnl
+dnl $Header$
+dnl
+
+AC_INIT(lists.h.in)
+
+dnl Checks for paths and programs.
+dnl
+AC_PROG_CC
+AC_PROG_RANLIB
+
+dnl Check for optional header files.
+dnl
+AC_CHECK_HEADERS(strings.h)
+
+dnl Enable warning flags for gcc.
+dnl
+if test "$GCC" = yes; then
+    CFLAGS="$CFLAGS -Wall -pedantic"
+    CXXFLAGS="$CXXFLAGS -Wall -pedantic"
+fi
+
+dnl Remove '-g' and '-O2' from the compile flags.
+dnl
+CFLAGS=`echo $CFLAGS | sed -e "s/-g//" -e "s/-O2//"`
+CXXFLAGS=`echo $CXXFLAGS | sed -e "s/-g//" -e "s/-O2//"`
+
+dnl Write results.
+dnl
+AC_CONFIG_HEADER(lists.h)
+AC_OUTPUT(GNUmakefile)
diff --git a/liblists/lists.c b/liblists/lists.c
index 1b981d9..e823b67 100644
--- a/liblists/lists.c
+++ b/liblists/lists.c
@@ -1,10 +1,39 @@
 /*
- *      $Source$
- *      $Revision$
- *      $Date$
+ * $Source$
+ * $Revision$
+ * $Date$
  *
- *      Copyright (C) 1996,97 by CyberSolutions GmbH.
- *      All rights reserved.
+ * Copyright (c) 1996-99 by Peter Simons <simons@cys.de>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Peter Simons.
+ *
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include "lists.h"
@@ -114,7 +143,7 @@ isEndOfList(const Node node)
 
     assert(node != NULL);
     if (node == NULL)
-      return NULL;
+      return 1;
 
     return (node->ln_Next == NULL);
 }
@@ -418,7 +447,7 @@ CountElements(List head)
 
     assert(head != NULL);
     if (head == NULL)
-      return NULL;
+      return 0;
 
     for (i = 0, node = getFirstNode(head); node != NULL; node = getNextNode(node))
       i++;
diff --git a/liblists/lists.h b/liblists/lists.h.in
similarity index 51%
rename from liblists/lists.h
rename to liblists/lists.h.in
index 83aaf0d..5a731ec 100644
--- a/liblists/lists.h
+++ b/liblists/lists.h.in
@@ -1,15 +1,47 @@
 /*
- *      $Source$
- *      $Revision$
- *      $Date$
+ * $Source$
+ * $Revision$
+ * $Date$
  *
- *      Copyright (C) 1996,97 by CyberSolutions GmbH.
- *      All rights reserved.
+ * Copyright (c) 1996-99 by Peter Simons <simons@cys.de>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Peter Simons.
+ *
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #ifndef __LIB_LISTS_H__
 #define __LIB_LISTS_H__ 1
 
+/* For autoconf... */
+#undef HAVE_STRINGS_H
+
 #include <stdlib.h>
 #include <string.h>
 #ifdef HAVE_STRINGS_H		/* sunos 4 needs this */
@@ -22,6 +54,7 @@
 
 /********** Useful defines and declarations **********/
 
+#ifndef __cplusplus
 #ifndef __HAVE_DEFINED_BOOL__
 #  define __HAVE_DEFINED_BOOL__ 1
 typedef int bool;
@@ -32,6 +65,7 @@ typedef int bool;
 #ifndef TRUE
 #  define TRUE (1==1)
 #endif
+#endif
 
 /********** Structures **********/
 
diff --git a/libtext/CustomTokenIterator.cpp b/libtext/CustomTokenIterator.cpp
new file mode 100644
index 0000000..dff219f
--- /dev/null
+++ b/libtext/CustomTokenIterator.cpp
@@ -0,0 +1,59 @@
+#include "text.hpp"
+#include <fstream.h>
+
+/**
+  Example using the Custom Mode of the TokenIterator
+ */
+class MyCustomTokenIterator: public TokenIterator{
+public:
+
+    MyCustomTokenIterator(string inputStr, bool b=false)
+	: TokenIterator(inputStr,TokenIterator::Custom, b){
+	eoltoken= '\n';
+	separator= ":\n";
+	whitespace= "";
+    };
+
+    MyCustomTokenIterator(istream &inputStr, bool b=false)
+	: TokenIterator(inputStr,TokenIterator::Custom, b){
+	eoltoken= '\n';
+	separator= ":\n";
+	whitespace= "";
+    };
+};
+
+
+int main(int argc, char* argv[]){
+    ifstream infile("/etc/passwd");
+    MyCustomTokenIterator tokenize(infile);
+    while(!infile.eof()){
+	string user= tokenize();
+	string password= tokenize();
+	string userid = tokenize();
+	string groupid= tokenize();
+	string description= tokenize();
+	string home = tokenize();
+	string shell= tokenize();
+	
+	if(password=="x")
+	  password="shadowed";
+
+	cout << "----"<<endl;
+	cout << "user       :" <<user <<endl;
+	cout << "password   :" <<password <<endl;
+	cout << "userid     :" <<userid <<endl;
+	cout << "groupid    :" <<groupid <<endl;
+	cout << "description:" <<description <<endl;
+	cout << "home       :" <<home <<endl;
+	cout << "shell      :" <<shell <<endl;
+
+	while( !infile.eof() && tokenize.thesep!= tokenize.eolToken() )
+	  {
+	      string trailing_garbage = tokenize();
+	      cout<<"\\:"<< trailing_garbage;
+	  }
+	cout<<endl;
+    }
+    return 0;
+}
+
diff --git a/libtext/Makefile b/libtext/Makefile
deleted file mode 100644
index 573118f..0000000
--- a/libtext/Makefile
+++ /dev/null
@@ -1,87 +0,0 @@
-#
-# Text routine library
-#
-# $Header$
-#
-
-# Make Rules:
-# ===========
-#
-.SUFFIXES: .3
-
-.c.o:
-	$(CC) $(CFLAGS) $(CPPFLAGS) -c $<
-
-
-# Compiler flags:
-# ===============
-#
-CFLAGS	= -Wall
-CPPFLAGS=
-
-# Linker flags:
-# =============
-#
-LDFLAGS	=
-LIBS	=
-
-
-#
-# Labels:
-# =======
-#
-SRCS	= easy_pattern_match.c find_next_line.c find_string.c easy_sprintf.c \
-	  transform_text.c wordwrap.c
-OBJS	= $(SRCS:.c=.o)
-MANFILES= text_easy_pattern_match.3 text_find_string.3 text_transform_text.3 \
-	  text_find_next_line.3 text_easy_sprintf.3 text_wordwrap.3
-
-
-#
-# Targets
-#
-.PHONY: all man clean depend
-
-all:	libtext.a
-
-man:	$(MANFILES)
-
-clean:
-	rm -f libtext.a test $(OBJS)
-	rm -f *.o *.3 *.core *.bak
-
-depend:
-	makedepend -Y /usr/include $(SRCS)
-	@rm -f Makefile.bak
-
-
-#
-# Actions:
-#=========
-#
-libtext.a:	$(OBJS)
-	$(AR) cr $@ $(OBJS)
-	$(RANLIB) $@
-
-text_easy_pattern_match.3:	easy_pattern_match.c text.h
-	c2man -itext.h easy_pattern_match.c
-
-text_find_string.3:		find_string.c text.h
-	c2man -itext.h find_string.c
-
-text_transform_text.3:		transform_text.c text.h
-	c2man -itext.h transform_text.c
-
-text_find_next_line.3:		find_next_line.c text.h
-	c2man -itext.h find_next_line.c
-
-text_easy_sprintf.3:		easy_sprintf.c text.h
-	c2man -itext.h easy_sprintf.c
-
-text_wordwrap.3:		wordwrap.c text.h
-	c2man -itext.h wordwrap.c
-
-
-#
-# Dependencies
-#
diff --git a/libtext/RegexTokenizer.cpp b/libtext/RegexTokenizer.cpp
new file mode 100644
index 0000000..64e9997
--- /dev/null
+++ b/libtext/RegexTokenizer.cpp
@@ -0,0 +1,259 @@
+/*
+ * $Source$
+ * $Revision$
+ * $Date$
+ *
+ * Copyright (c) 1999 by CyberSolutions GmbH, Germany.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by CyberSolutions GmbH.
+ *
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "text.hpp"
+
+char RegexTokenizer::workspace[RegexTokenizer::N_substring+1]="";
+
+RegexTokenizer::RegexTokenizer(){
+}
+
+void RegexTokenizer::reset(){
+  input= string();
+  int i=N_pm; while(--i>0){ pm[i].rm_so=-1; pm[i].rm_eo=-1; }
+  so= 0;
+  eo= 0;
+  previous_eo= -1;
+  error= 0;
+}
+
+int RegexTokenizer::set(string _input,list<const char*> _regex){
+  reset();
+  input= _input;
+ 
+  list<const char*>::iterator first= _regex.begin();
+  list<const char*>::iterator last = _regex.end();
+
+  while(first!=last){
+
+    regex_t re;
+    int i;
+
+    //REG_EXTENDED
+    //use extended regular expressions
+    //REG_NEWLINE
+    //makes ^...$ work to match newline/endofline
+
+    i= regcomp (&re, *first, REG_EXTENDED|REG_NEWLINE);
+    if(i)
+      return i;
+    regex.push_back(re);
+    regex_src.push_back(*first);
+    ++first;
+  }
+}
+
+RegexTokenizer::RegexTokenizer(string _input,Mode _mode){
+  mode= _mode;
+  //create a list
+  list<const char*>alist;
+  switch(_mode){
+  case Word: 
+    alist.push_back("([^ \t\n]*)([ \t\n]*)");
+    break;
+  case Line: 
+    alist.push_back("^(.*)$\n"); 
+    break;
+  case RFC: 
+    alist.push_back("((^.*$)((\n)^[ \t]+.*$)*)(\n)?"); 
+    //this works, but output is confusing
+    // that is, how to remove the glue ?
+    break;
+  case Custom: 
+    //break;
+  default:
+    cerr<<"RegexTokenizer mode constructor called with pointless mode."<<endl;
+  }
+  set(_input,alist);
+}
+
+RegexTokenizer::RegexTokenizer(string _input,const char* oneregex){
+  //create a list
+  list<const char*>alist;
+  alist.push_back(oneregex);
+  set(_input,alist);
+}
+
+RegexTokenizer::RegexTokenizer(string _input,list<const char*> _regex){
+  set(_input,_regex);
+}
+
+RegexTokenizer::RegexTokenizer(const RegexTokenizer &r){
+  //cerr<<"(copy constructor)"<<endl;
+  set(r.input,r.regex_src);
+
+  // result= r.result; "ANSI C++ fobids ..."
+  memcpy(&result[0], &r.result[0], N_pm*sizeof(result[0]) );
+
+  whichregexwasmatched= r.whichregexwasmatched;
+
+  // pm= r.pm;
+  memcpy(&pm[0], &r.pm[0], N_pm*sizeof(pm[0]) );
+
+  I_pm= r.I_pm;
+  error= r.error;
+  so= r.so;
+  eo= r.eo;
+  previous_eo= r.previous_eo;
+  mode= r.mode; 
+}
+
+
+RegexTokenizer RegexTokenizer::begin() const
+    {
+      //cerr<<"(begin)"<<endl;
+      RegexTokenizer RT(*this);
+      RT.error= 0;
+      RT.so= 0;
+      RT.eo= 0;
+      RT.previous_eo= -1;
+      return RT;
+    }
+
+RegexTokenizer RegexTokenizer::end() const
+    {
+      //cerr<<"(end)"<<endl;
+      RegexTokenizer RT(*this);
+      RT.error= 1;
+      RT.so= input.length();
+      RT.eo= input.length();
+      RT.previous_eo= RT.eo;
+      return RT;
+    }
+
+void RegexTokenizer::advance(){
+  //try all patterns until one matches
+
+  //cerr<<"advance"<<endl;
+  //wonder where to get the string from ?
+  //using a char * buffer is ugly, but there is no regex for string
+  // (no regex stuff which I'm aware of at the time of writing (1999) )
+  if(eo < (signed int)input.size()){
+    // there is no c_substr(eo,N_substring)  ;-(
+    string sWorkspace(input,eo,N_substring);
+    // waste of time, but I�m not sure when sWorkspace.c_str() gets freed;
+    strncpy(workspace, sWorkspace.c_str(), N_substring) ;
+  }
+  else
+    workspace[0]='\0';
+
+  result[0]= string();
+
+  if(
+     error == 0 && /* regex ok ? */ 
+     *workspace != 0 && /* check end buffer */
+     previous_eo < eo /* make sure we finish */
+     )
+    {/* while matches found */
+      //cerr<<"go over regex's supplied"<<endl;
+      list<regex_t>::iterator first= regex.begin();
+      list<regex_t>::iterator last = regex.end();
+      error= 1;
+  
+      previous_eo= eo;
+      while(error && result[0].empty() && first!=last){//check for empty buffer
+	{
+	  //cerr<<endl <<"matching "<< workspace + eo<< endl;
+
+	  /* substring found between pm.rm_so and pm.rm_eo */
+	  /* This call to regexec() finds the next match */
+	  error = regexec(&*first, workspace, N_pm, &pm[0], 0); 
+	  ++first;
+	}
+
+	if(!error){
+	  int final_so= eo;
+	  int final_eo= eo;
+	  //Go over the members of pm to see submatches	  
+	  int i;
+	  i=N_pm; while(--i>0){ result[i]= string(); }
+	  i=0;
+	  while(i<N_pm &&
+		pm[i].rm_so>=0 && pm[i].rm_eo>0 &&
+		pm[i].rm_so<N_substring && pm[i].rm_eo<=N_substring
+		){
+	    int local_so= previous_eo+pm[i].rm_so;
+	    int local_eo= previous_eo+pm[i].rm_eo;
+	    if(i==0)
+	      {
+		final_so= local_so;
+		final_eo= local_eo;
+	      }
+	    result[i]= input.substr(local_so, local_eo-local_so);
+	    //cout <<"match["<<i<<"]{"<<pm[i].rm_so<<","<<pm[i].rm_eo<<"}";
+	    //cout <<"("<< local_so <<","<< local_eo <<"): " << result[i] << endl;
+	    
+	    i++;
+	  }
+	  so= final_so;
+	  eo= final_eo;
+	  I_pm= i;
+	}
+	else{
+	  (void)regerror(error, &*first, workspace, N_substring);
+	}
+      }
+    }else{
+      //if the final match has been passed,
+      //signal end (to make != operator work ?PS)
+      // like in *this= end();
+      so= input.length();
+      eo= input.length();
+      previous_eo= eo;
+      
+    }
+
+}
+
+RegexTokenizer::~RegexTokenizer(){
+  list<regex_t>::iterator first= regex.begin();
+  list<regex_t>::iterator last = regex.end();
+
+  while(first!=last){ 
+    //cerr<<"freeing "<<&*first<<endl;
+    (void) regfree (&*first);
+    ++first;
+  }
+}
+
+ostream& operator<<(ostream &o, const RegexTokenizer &r){
+  o<<"("<<&r<<" "<<r.previous_eo<<"-"<<r.so<<"/"<<r.eo<<" ?"<<r.error<<")["<<r.input<<"]"<<endl;
+  return o;
+}
+
diff --git a/libtext/RegexTokenizer_test.cpp b/libtext/RegexTokenizer_test.cpp
new file mode 100644
index 0000000..6778033
--- /dev/null
+++ b/libtext/RegexTokenizer_test.cpp
@@ -0,0 +1,234 @@
+/*
+ * $Source$
+ * $Revision$
+ * $Date$
+ *
+ * Copyright (c) 1999 by CyberSolutions GmbH.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by CyberSolutions GmbH.
+ *
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "text.hpp"
+
+// begin test section
+// Set verbose to 1 if you want to see the parsed tokens
+const int verbose=0;
+
+int test1(){
+  string input("a22bbb4444ccccc999999999dfgDFG");
+
+  // Set up the tokenizer to match the input string
+  // against a regular expression.
+  // The entire match will be returned by *rt,
+  // subexpressions by rt[1], rt[2], rt[3], ..
+  RegexTokenizer rt(input,"([a-z]*)([^a-z]*)");
+
+  int count=0;
+  RegexTokenizer::iterator next= rt.begin();
+  const RegexTokenizer::iterator last= rt.end();
+
+  if(verbose)
+    cout << "*** begin 1*** "<<endl;
+
+  while(next!=last){
+    ++next;// Preinc - processes input
+    if(next!=last){
+      if(verbose)
+         cout << *next   // Entire match,
+	      <<"="
+	      <<next[1]  // first subexpression,
+	      <<"+"
+	      <<next[2]  // 2nd subexpr.
+	      << endl;
+    }
+    count++;
+
+  }
+  if(verbose)
+      cout << "--- end 1 ---"<<count<<endl;
+  if(count != 5)
+    return 1;
+
+  return 0;
+}
+
+int test2(){
+  string input("Word-Satz 1\n Satz 1a\nSatz 2\n\tSatz2a.");
+
+  // Set up the tokenizer to match the input string
+  // against a regular expression.
+  // The entire match will be returned by *rt,
+  // subexpressions by rt[1], rt[2], rt[3], ..
+  RegexTokenizer rt(input,"([^ \t\n]*)([ \t\n]*)");
+  int count=0;
+
+  RegexTokenizer::iterator next= rt.begin();
+  const RegexTokenizer::iterator last= rt.end();
+
+  if(verbose)
+      cout << "*** begin 2*** "<<endl;
+
+  while(next!=last){
+    ++next;// Preinc - processes input
+    if(next!=last){
+      if(verbose)
+	cout <<next[1] // first matched subexpression
+	     <<"["
+	     <<next[2] // second matched subexpression
+	     <<"]"<< endl;
+    }
+     count++;
+  }
+
+  if(verbose)
+      cout << "--- end 2 ---"<<count<<endl;
+  if(count != 8)
+    return 1;
+
+  return 0;
+}
+
+int testWord(){
+  string input("Ein Satz aus vielen langen Wor-ten.\nUnd ein zweiter Satz.2\n3");
+
+  // Set up the tokenizer to match the input string
+  // against a regular expression that defines the word-wise tokenizing.
+  // The expression used is "([^ \t\n]*)([ \t\n]*)" .
+  // The entire match will be returned by *rt,
+  // subexpressions by rt[1], rt[2], rt[3], ..
+  RegexTokenizer rt(input,RegexTokenizer::Word);
+  int count=0;
+
+  RegexTokenizer::iterator next= rt.begin();
+  const RegexTokenizer::iterator last= rt.end();
+
+  if(verbose)
+      cout << "*** begin Word*** "<<endl;
+
+  while(next!=last){
+    ++next;// Preinc - processes input
+    if(next!=last){
+      if(verbose)
+	cout <<next[1]   // first matched subexpression,
+	     <<"["
+	     <<next[2]   // 2nd matched subexpr.
+	     <<"]"<< endl;
+    }
+    count++;
+  }
+
+  if(verbose)
+      cout << "--- end Word ---"<<count<<endl;
+  if(count != 12)
+    return 1;
+
+  return 0;
+}
+
+
+int testLine(){
+  string input("Line-Satz 1\n Satz 1a\nSatz 2\n\tSatz2a.");
+
+  // Set up the tokenizer to match the input string
+  // against a regular expression that defines line by line tokenizing.
+  // The expression used is "^(.*)$\n" .
+  // The entire match will be returned by *rt,
+  // subexpressions by rt[1], rt[2], rt[3], ..
+  RegexTokenizer rt(input,RegexTokenizer::Line);
+  int count=0;
+
+  RegexTokenizer::iterator next= rt.begin();
+  const RegexTokenizer::iterator last= rt.end();
+
+  if(verbose)
+      cout << "*** begin Line*** "<<endl;
+
+  while(next!=last){
+    ++next;// Preinc - processes input
+    if(next!=last){
+  if(verbose)
+          cout <<"'"
+	       <<next[1]   // first matched subexpression
+	       <<"'"<<"["
+	       <<next[2]   // second matched subexpression
+	       <<"]"<< endl;
+    }
+    count++;
+  }
+
+  if(verbose)
+      cout << "--- end Line ---"<<count<<endl;
+  if(count != 5)
+    return 1;
+
+  return 0;
+}
+
+int testRFC(){
+  string input("RFC-Satz 1\n Satz 1a\nSatz 2\n\tSatz2a\n\tSatz2b.");
+
+  // Set up the tokenizer to match the input string
+  // against a regular expression that defines RFC-style tokenizing.
+  // The expression used is "((^.*$)((\n)^[ \t]+.*$)*)(\n)?" .
+  // Bug: whitespace that glues one line to the next is not removed.
+  // (afaik, there not way to do this with a single regular expression).
+  // The entire match will be returned by *rt,
+  // subexpressions by rt[1], rt[2], rt[3], ..
+  RegexTokenizer rt(input,RegexTokenizer::RFC);
+  int count=0;
+
+  RegexTokenizer::iterator next= rt.begin();
+  const RegexTokenizer::iterator last= rt.end();
+
+  if(verbose)
+      cout << "*** begin RFC*** "<<endl;
+  while(next!=last){
+    ++next;// Preinc - processes input
+    if(next!=last){
+      if(verbose)
+          cout <<"'"<<next[1]<<"'"<<"["<<next[2]<<"]"<<"["<<next[3]<<"]"<< endl;
+      // first, second and third matched subexpression
+    }
+    count++;
+  }
+
+  if(verbose)
+      cout << "--- end RFC ---"<<count<<endl;
+  if(count != 3)
+    return 1;
+
+  return 0;
+}
+
+int main(int argc, char *argv[])
+{
+  return test1() || test2() || testWord() || testLine() || testRFC() ;
+}
diff --git a/libtext/SMakefile b/libtext/SMakefile
new file mode 100644
index 0000000..c0584f2
--- /dev/null
+++ b/libtext/SMakefile
@@ -0,0 +1,13 @@
+#
+# Skeleton Makefile -- process with smake to create real Makefile.
+#
+# $Header$
+#
+.opt -o GNUmakefile.in
+
+LIBRARY		= text
+SRCS		= easy_pattern_match.c find_next_line.c find_string.c easy_sprintf.c \
+	          transform_text.c wordwrap.c
+
+.include <library.smk>
+.include <autoconf.base.smk>
diff --git a/libtext/TokenIterator.cpp b/libtext/TokenIterator.cpp
new file mode 100644
index 0000000..4b84984
--- /dev/null
+++ b/libtext/TokenIterator.cpp
@@ -0,0 +1,380 @@
+/*
+ * $Source$
+ * $Revision$
+ * $Date$
+ *
+ * Copyright (c) 1999 by CyberSolutions GmbH, Germany.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by CyberSolutions GmbH.
+ *
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <cstdlib>
+#include <cstring>
+#include <strstream>
+
+#include "text.hpp"
+
+static int mystrpos(const char *c,char s){
+  int i=0;
+  while(c[i])
+    { 
+      if(c[i]==s){
+	  return i; 
+      }
+      i++;
+    };
+  if(!c[i])
+    return -1;
+  else
+    return -1;
+}
+
+string TokenIterator::mooncheese= string("The Moon is A green cheese (sheesh!).");
+
+void TokenIterator::reset(){
+  i= (istream*)0;
+  brace= 0;  bracestack[0]='\0';
+  braceoftoken= 0;
+  thesep= '\0'; previoussep= '\0';
+  eoltoken= '\n';
+  whitetoken= ' ';
+  buffer= mooncheese;
+}
+
+void TokenIterator::setMode(Mode m){
+  mode= m;
+  switch(mode){
+  case Word:
+    whitespace=" \t";
+    separator="";
+    continuation="";
+    leftbrace="\"";
+    rightbrace="\"";
+    escapechar = '\\';
+    break;
+  case Line:
+    whitespace="";
+    separator="";
+    continuation="";
+    leftbrace="";
+    rightbrace="";
+    escapechar = '\\';
+    break;
+  case RFC:
+    whitespace="";
+    separator="";
+    continuation=" \t";
+    leftbrace="";
+    rightbrace="";
+    escapechar ='\\';
+    break;
+  default:
+    whitespace = " \t";
+    separator  = ",;:+-=/\\@";
+    continuation="";
+    leftbrace  = "\"([{<";
+    rightbrace = "\")]}>";
+    escapechar = '\\';
+  }
+}
+
+TokenIterator::TokenIterator(){
+  reset();
+  braces= false;
+  setMode(Word);
+  ismyistream= false;
+}
+
+
+
+TokenIterator TokenIterator::finalIterator = TokenIterator();
+
+//TokenIterator::TokenIterator(string s, Mode m=Word, bool b=false){
+TokenIterator::TokenIterator(string s, Mode m, bool b){
+  reset();
+  braces= b;
+  setMode(m);
+  ismyistream= true;
+  i= new istrstream(s.c_str());
+  //++(*this);// read first value (not done; makes this unwieldly)
+}
+
+//TokenIterator::TokenIterator(istream &is, Mode m=Word, bool b=false){
+TokenIterator::TokenIterator(istream &is, Mode m, bool b){
+  reset();
+  braces= b;
+  setMode(m);
+  ismyistream= false;
+  i= &is;
+  //++(*this);// read first value (not done; makes this unwieldly)
+}
+
+TokenIterator::~TokenIterator(){
+  if(ismyistream)
+    delete i;
+}
+
+
+TokenIterator::iterator& TokenIterator::begin() const
+{
+    if(  i && i->good() && !i->eof()  )  
+	return *const_cast<TokenIterator*> (this);
+    else	  
+	return finalIterator;
+};
+
+
+//! from Input Iterator
+//! Returns the next object in the stream.
+TokenIterator::operator string() const
+{
+    return buffer; 
+};
+
+
+//! from Input Iterator
+//! Returns the next object in the stream.
+TokenIterator::operator string()
+{
+    if( buffer== mooncheese )
+	(*this)();  
+    return buffer; 
+};
+
+
+//! from Input Iterator
+//! Returns the next object in the stream.
+const string TokenIterator::operator*() const
+{
+    return buffer; 
+};
+
+
+//! from Input Iterator
+//! Returns the next object in the stream.
+const string TokenIterator::operator*()
+{
+    if( buffer== mooncheese )
+	(*this)();  
+    return buffer; 
+};
+
+
+//! from Input Iterator
+//! Preincrement.
+TokenIterator& TokenIterator::operator++()
+{ 
+    (*this)(); return *this;    
+};
+
+    //! from Input Iterator
+    //! Postincrement. 
+    //! this works .. almost
+
+
+TokenIterator& TokenIterator::operator++(int i)
+{  
+ static TokenIterator t = *this; 
+ while(i>0){ --i; (*this)++;  } 
+ return t; 
+};
+
+
+/** compare not equal */
+bool TokenIterator::operator != (TokenIterator &R) const{// const & I say, const
+    // note: const TokenIterator &R will create a copy of R :-(
+    // this can't work; have to allow use of const in the above
+    // has to be compared differently( endflags .. ! )
+	
+    return &R!= this;
+}
+
+/** compare two Tokenizers */
+bool TokenIterator::operator == (TokenIterator &R) const{
+    // note: const TokenIterator &R will create a copy of R :-(
+    // this can't work; have to allow use of const in the above
+    // has to be compared differently( endflags .. ! )
+
+    return !( *this != R );
+}
+
+/** need this for foreach template */
+bool TokenIterator::operator ! (void) const{
+    return !( i && i->good() && !i->eof() );
+}
+
+/** need this for fun */
+bool TokenIterator::hastoken(void) const{
+     return  i && i->good() && !i->eof();
+}
+
+
+
+inline bool linefeed(char c, istream *i){
+  if(c=='\r'){
+    char d;
+    if( i->get(d) ){
+      if(d=='\n')
+	;/* dos line feed */
+      else
+	i->unget();
+    }
+    return true;
+  }else if(c=='\n'){
+    char d;
+    if( i->get(d) ){
+      if(d=='\r')
+	;/* carriage return after line feed(?) */
+      else
+	i->unget();
+    }
+    return true;
+  }
+  return false;
+}
+
+
+string TokenIterator::operator()(){
+  char c= 0;
+  int pos;
+
+  previoussep= thesep;
+  buffer= string("");
+
+  while( i->get(c) ){
+
+    if(c==escapechar){
+
+      char d;// special translations need to be plugged in here
+
+      if( i->get(d) ){
+	if( brace && linefeed(d,i) )
+	  buffer+= '\n';
+	else
+	  buffer+= d;
+	}
+      }
+   
+    else if( linefeed(c,i) ){
+
+      thesep= eoltoken;
+      {
+	switch(mode){
+	case Word:
+	    if( previoussep!=whitetoken || buffer.length() )// space" = "
+	      return buffer;
+	    break;
+	case Line:
+	  return buffer;
+	  break;
+	case RFC:
+	  { 
+	    char d;
+	    if( i->get(d) ){
+	      if(!strchr(continuation,d) ){
+		i->unget();
+		return buffer;
+	      }else
+		i->unget();
+	    
+	    }
+
+	    do{
+	      if(!i->get(d)){ return buffer; }
+	    }while( strchr(continuation,d) );
+
+	    //should "A\n \tB" be returned as one token "AB" or as "A B" ?
+	    // currently, "AB" is returned
+	    i->unget();// unget
+
+	  }
+	  break;
+	default:
+	  return buffer;
+	}
+      }
+    }
+    else if( !(brace) && strchr(whitespace,c) ){ // brace>0 implies braces==true
+
+      if(buffer.length()){
+	thesep= whitetoken;
+	return buffer;// send token
+      }else
+	previoussep= whitetoken;// !?
+	;/* skip */
+
+    }else if(strchr(separator,c)){
+      thesep= c;
+      if( previoussep!=whitetoken || buffer.length() )// space" = "
+	return buffer;// send token
+
+    }else if(brace>0 && bracestack[brace]==c){
+
+      /* closing brace */
+      braceoftoken= brace;
+      brace--; /* pop stack of braces */
+
+      thesep= c;
+      return buffer;// send token
+      
+    }else if( braces && (pos=mystrpos(leftbrace,c), pos>=0) ){//pos>0
+
+      /* opening brace */
+      braceoftoken= brace;
+      bracestack[++brace]= rightbrace[pos];
+      if( previoussep!=whitetoken || buffer.length() ){// space" = "
+	  thesep= c;
+	  return buffer;// send token
+      }
+    }else{
+      /* normal, append to token */
+      buffer+= c;
+    }
+  }
+  return buffer;
+}
+
+
+LexxStyleTokenIterator::LexxStyleTokenIterator(TokenIterator *Tbase){
+  state=0;
+  base= Tbase;
+}
+
+LexxStyleToken&  LexxStyleTokenIterator::operator()(){
+ state= !state;
+  thetoken.ttype= (LexxStyleToken::Tokentype)state;
+  if(state){
+    thetoken.Tstring= (*base)();     
+  }else{
+    thetoken.Tchar= base->thesep;     
+  }
+  return thetoken;
+}
diff --git a/libtext/TokenIterator_test.cpp b/libtext/TokenIterator_test.cpp
new file mode 100644
index 0000000..a4448b9
--- /dev/null
+++ b/libtext/TokenIterator_test.cpp
@@ -0,0 +1,222 @@
+/*
+ * $Source$
+ * $Revision$
+ * $Date$
+ *
+ * Copyright (c) 1999 by CyberSolutions GmbH.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by CyberSolutions GmbH.
+ *
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <cstdlib>
+#include <fstream.h>
+#include <string>
+#include "text.hpp"
+#include <algo.h>
+
+
+// Set verbose=1 to see the tokens
+const int verbose=1;
+
+/**
+   The do_sth_with_aTokenIterator class
+   was written with the intent
+   to have the class written to cout
+   using the for_each template
+ */
+class do_sth_with_aTokenIterator {
+  string s;
+public:
+  explicit do_sth_with_aTokenIterator() : s() {};
+  void operator()(const TokenIterator& s)
+    { cout<< *s <<endl; };
+
+    //! Postincrement. 
+    do_sth_with_aTokenIterator& operator++(int i)
+    {  
+    };
+
+};
+
+int main(int argc,char *argv[]){
+  {
+    //Tokenize words
+    ifstream i("test.txt");
+    if(!i)
+      { cerr<<"Test Data not found(test.txt)"<< endl; return(2); }
+
+    //Initialize the Tokenizer for word mode
+    TokenIterator tokenize(i,TokenIterator::Word);
+
+    string token="";
+    int count=0;
+    if(verbose)
+      cout<<endl<<"--Word"<<endl;
+
+    //Loop over all tokens
+    while( tokenize.hastoken() ){
+	token= tokenize();
+	if(verbose)
+	  cout<<":"<<token<<"\n";
+	count ++;
+    }
+    if(verbose)
+      cout<<endl<<count<<endl;
+    if(count!=27)
+      ;//return(1);
+  }
+  {
+      //Tokenize words, with " "
+      ifstream i("test.txt");
+      if(!i) return 255;
+
+      //Initialize the Tokenizer for word mode, "a b" is one word
+      TokenIterator tokenize(i,TokenIterator::Word,true);
+
+      string token="";
+      int count=0;
+      if(verbose)
+	cout<<endl<<"--\"Word\""<<endl;
+
+      //Loop over all tokens
+      while( tokenize.hastoken() ){
+	  token= tokenize();
+	  if(verbose)
+	    cout<<":"<<token<<"\n";
+	  count ++;
+      }
+      if(verbose)
+	cout<<endl<<count<<endl;
+      if(count!=25)
+	;//return(1);
+  }
+  {
+      //Tokenize lines
+      ifstream i("test.txt");
+      if(!i) return 255;
+
+      //Initialize the Tokenizer for line mode ( one line == one token )
+      TokenIterator tokenize(i,TokenIterator::Line);
+
+      string token="";
+      int count=0;
+      if(verbose)
+	cout<<endl<<"--Line"<<endl;
+
+      while( tokenize.hastoken() ){
+	  token= tokenize();
+	  if(verbose)
+	    cout<<":"<<token<<"\n";
+	  count ++;
+      }
+      if(verbose)
+	cout<<endl<<count<<endl;
+      if(count!=10)
+	;//return(1);
+  }
+  {
+      //Tokenize 'RFC-style'
+      ifstream i("test.txt");
+      if(!i) return 255;
+
+      //Initialize Tokenizer for RFC mode
+      // ( If the following line starts with space or tabulator,
+      // it is glued to the previous line ).
+      TokenIterator tokenize(i,TokenIterator::RFC);
+      string token="";
+      int count=0;
+      if(verbose)
+	cout<<endl<<"--RFC"<<endl;
+
+      //Loop over all tokens
+      while( tokenize.hastoken() ){
+	  token= tokenize();
+	  if(verbose)
+	    cout<<":"<<token<<"\n";
+	  count ++;
+      }
+      if(verbose)
+	cout<<endl<<count<<endl;
+      if(count!=5)
+	;//return(1);
+  }
+
+  //trying sequence capability ..
+  {
+      //Tokenize words
+      ifstream i("test.txt");
+      if(!i) return 255;
+      TokenIterator tokenize(i,TokenIterator::Word);
+      string token="";
+      int count=0;
+      if(verbose)
+	cout<<endl<<"--Word(seq)"<<endl;
+
+      //TokenIterator has only dummy capabilities, hence the warnings
+      // or maybe I just don't get what a unary function is ?
+      /*      for_each (
+		tokenize.begin(),
+ 		tokenize.end(),
+		do_sth_with_aTokenIterator()
+		)
+	  ++count;
+      */
+
+      if(verbose)
+	cout<<endl<<count<<endl;
+
+      if(count!=0)
+	cerr<<"You mean someone repaired that ?!"<<endl;
+  }
+
+  // there are different way to use the TokenIterator;
+  // this way seems intuitive to me.
+  // testing this way ..    
+    {
+      
+      ifstream i("test.txt");
+      if(!i) return 255;
+      TokenIterator lines(i, TokenIterator::Line);
+
+      cout<<endl;      
+      cout<<"testing while( line= ++lines, ( lines.begin() != lines.end() ) )"
+	  <<"\n            cout<<\"<<line<<\"<<endl; "<<endl;
+      
+ 
+      string line;
+      while( line= ++lines, ( lines.begin() != lines.end() ) )
+	  cout<<"\""<<line<<"\""<<endl; 
+   
+    }
+ 
+
+  return 0;
+}
diff --git a/libtext/configure.in b/libtext/configure.in
new file mode 100644
index 0000000..66cf5e0
--- /dev/null
+++ b/libtext/configure.in
@@ -0,0 +1,28 @@
+dnl
+dnl configure.in -- Process this file with autoconf to produce a configure script.
+dnl
+dnl $Header$
+dnl
+
+AC_INIT(text.h)
+
+dnl Checks for paths and programs.
+dnl
+AC_PROG_CC
+AC_PROG_RANLIB
+
+dnl Enable warning flags for gcc.
+dnl
+if test "$GCC" = yes; then
+    CFLAGS="$CFLAGS -Wall -pedantic"
+    CXXFLAGS="$CXXFLAGS -Wall -pedantic"
+fi
+
+dnl Remove '-g' and '-O2' from the compile flags.
+dnl
+CFLAGS=`echo $CFLAGS | sed -e "s/-g//" -e "s/-O2//"`
+CXXFLAGS=`echo $CXXFLAGS | sed -e "s/-g//" -e "s/-O2//"`
+
+dnl Write results.
+dnl
+AC_OUTPUT(GNUmakefile)
diff --git a/libtext/easy_pattern_match.c b/libtext/easy_pattern_match.c
index 8dc219e..1180950 100644
--- a/libtext/easy_pattern_match.c
+++ b/libtext/easy_pattern_match.c
@@ -1,10 +1,39 @@
 /*
- *      $Source$
- *      $Revision$
- *      $Date$
+ * $Source$
+ * $Revision$
+ * $Date$
  *
- *      Copyright (C) 1996 by CyberSolutions GmbH.
- *      All rights reserved.
+ * Copyright (c) 1996-99 by Peter Simons <simons@cys.de>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Peter Simons.
+ *
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/types.h>
diff --git a/libtext/easy_sprintf.c b/libtext/easy_sprintf.c
index 5a83a7f..2027862 100644
--- a/libtext/easy_sprintf.c
+++ b/libtext/easy_sprintf.c
@@ -1,10 +1,39 @@
 /*
- *      $Source$
- *      $Revision$
- *      $Date$
+ * $Source$
+ * $Revision$
+ * $Date$
  *
- *      Copyright (C) 1996,97 by CyberSolutions GmbH.
- *      All rights reserved.
+ * Copyright (c) 1996-99 by Peter Simons <simons@cys.de>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Peter Simons.
+ *
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <stdarg.h>
diff --git a/libtext/find_next_line.c b/libtext/find_next_line.c
index 9a25659..5ef4c59 100644
--- a/libtext/find_next_line.c
+++ b/libtext/find_next_line.c
@@ -1,10 +1,39 @@
 /*
- *      $Source$
- *      $Revision$
- *      $Date$
+ * $Source$
+ * $Revision$
+ * $Date$
  *
- *      Copyright (C) 1996,97 by CyberSolutions GmbH.
- *      All rights reserved.
+ * Copyright (c) 1996-99 by Peter Simons <simons@cys.de>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Peter Simons.
+ *
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include "text.h"
diff --git a/libtext/find_string.c b/libtext/find_string.c
index 01db929..887dc2f 100644
--- a/libtext/find_string.c
+++ b/libtext/find_string.c
@@ -1,10 +1,39 @@
 /*
- *      $Source$
- *      $Revision$
- *      $Date$
+ * $Source$
+ * $Revision$
+ * $Date$
  *
- *      Copyright (C) 1996 by CyberSolutions GmbH.
- *      All rights reserved.
+ * Copyright (c) 1996-99 by Peter Simons <simons@cys.de>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Peter Simons.
+ *
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <string.h>
diff --git a/libtext/test.txt b/libtext/test.txt
new file mode 100644
index 0000000..31f0c64
--- /dev/null
+++ b/libtext/test.txt
@@ -0,0 +1,14 @@
+Ein Wort ist kein Satz,
+ aber ein Satz ist kein Ersatz f�r ein Wort.
+	 "Das Wort" ist der "Ein Satz" im "Vor Wort".
+Dagegen kommt das "Vor Spiel" vor dem Spiel,
+ obwohl das eigentliche "Kommen" im Spiel stattfindet.
+So gesehen, ist das "Vor Spiel" ein richtiges Spiel,
+ w�hrend das sogenannte Spiel der "Volle Ernst" ist.
+Andererseits, wenn Ernst voll ist, findet meist weder "Vor Spiel" noch ein Spiel statt.
+Dieses Spiel hatte dann schon vorher stattgefunden; 
+  Meistens war es dann ein Lokaltermin des "Fu�ball Clubs Bayern".
+Darauf folgte ein Termin im Lokal; Nach dem Fu�ballspiel zog Ernst das Lokal
+ der "Oben ohne Bar" vor;
+Denn in der Bar ging nichts ohne, weder ohne das eine, noch ohne unten. 
+Des weiteren hatte ein Besuch in der Bar f�r Ernst meist ein "Nach Spiel" zu Hause. 
diff --git a/libtext/text.h b/libtext/text.h
index e64a2da..1c05819 100644
--- a/libtext/text.h
+++ b/libtext/text.h
@@ -1,10 +1,39 @@
 /*
- *      $Source$
- *      $Revision$
- *      $Date$
+ * $Source$
+ * $Revision$
+ * $Date$
  *
- *      Copyright (C) 1996,97 by CyberSolutions GmbH.
- *      All rights reserved.
+ * Copyright (c) 1996-99 by Peter Simons <simons@cys.de>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Peter Simons.
+ *
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #ifndef __LIB_TEXT_H__
@@ -19,17 +48,19 @@
 
 /********** Useful defines and declarations **********/
 
+#ifndef __cplusplus
 #ifndef __HAVE_DEFINED_BOOL__
 #  define __HAVE_DEFINED_BOOL__ 1
 typedef int bool;
 #endif
+
 #ifndef FALSE
 #  define FALSE (0==1)
 #endif
 #ifndef TRUE
 #  define TRUE (1==1)
 #endif
-
+#endif
 enum {
     TEXT_REGEX_OK = 0,
     TEXT_REGEX_ERROR,
diff --git a/libtext/text.hpp b/libtext/text.hpp
new file mode 100644
index 0000000..4e86c90
--- /dev/null
+++ b/libtext/text.hpp
@@ -0,0 +1,539 @@
+/*
+ * $Source$
+ * $Revision$
+ * $Date$
+ *
+ * Copyright (c) 1999 by CyberSolutions GmbH.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by CyberSolutions GmbH.
+ *
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __LIB_TEXT_HPP__
+#define __LIB_TEXT_HPP__
+
+#include <stdexcept>
+#include <string>
+#include <cstring>
+#include <iterator>
+#include <list>
+#include <sys/types.h>
+#include "../RegExp/RegExp.hpp"
+
+/** \file text.hpp
+
+    A library for text parsing and manipulation.
+
+    This library contains a couple of useful functions for dealing
+    with strings, most notably a regular expression class and a
+    generic config file parser.
+*/
+
+//////////////////////////////////////////////////
+//                tokenize()                    //
+//////////////////////////////////////////////////
+
+template<class T>
+void tokenize(insert_iterator<T> & ii, const string & buffer,
+	      const char * sep = " \t\r\n")
+{
+    string::size_type pos = 0;
+    while(pos != string::npos) {
+	string::size_type end_pos = buffer.find_first_of(sep, pos);
+	string token = buffer.substr(pos, end_pos-pos);
+	if (!token.empty()) {
+	    *ii = token;
+	    ++ii;
+	    end_pos = buffer.find_first_not_of(sep, end_pos);
+	}
+	if (end_pos != string::npos)
+	  end_pos = buffer.find_first_not_of(sep, end_pos);
+	pos = end_pos;
+    }
+}
+
+//////////////////////////////////////////////////
+//               RegexTokenizer()               //
+//////////////////////////////////////////////////
+
+
+/** The RegexTokenizer extracts tokens from 'string' input.
+
+    string or stream input has to be converted to string. This means
+    the Tokenizer should be useful with large input which is divided
+    into large chunks. A match is performed against a list of regular
+    expressions. Each expression defines a match-separator pair.
+    Regular Expressions are compiled with REG_EXTENDED flag.
+*/
+
+class RegexTokenizer: forward_iterator<RegexTokenizer, int> {
+public:
+
+    /** maximum number of registers, subexpressions */
+    static const int N_pm=10;
+
+    /** maximum length of a match */
+    static const int N_substring=1024;
+
+    /** the workspace */
+    static char workspace[N_substring+1]; //+1 for trailing \0
+
+    /** Modes (other than Custom) make the \a RegexTokenizer use a standard regular expression.
+
+	\a Custom : The tokenizer uses the regular expression you specify.
+
+	\a Word   : The tokenizer gives chunks of input separated by space and tabs.
+
+	\a Line   : The tokenizer splits input at end of line.
+
+	\a RFC    : The tokenizer splits input at end of line.
+	         Lines may be continued by starting a new line with spaces or tabs.
+		 These continuation characters are NOT stripped from the tokens.
+
+     */
+    enum Mode {Custom, Word, Line, RFC};
+
+    /** RegexTokenizer is it''s own iterator. */
+    typedef RegexTokenizer iterator;
+private:
+    string input;
+    string result[N_pm];
+    list<const char*>regex_src;// the source regexes needed for copy/begin/end
+    list<regex_t>regex;        // not sure multiple regexes are a smart idea
+    int whichregexwasmatched;
+    regmatch_t pm[N_pm];
+    int I_pm;                  // matched subexpressions
+    int error;                 // result of regex calls
+    int so,eo,previous_eo;     // positions
+    //int matchMask;//bitset; which fields to return by the * operator
+protected:
+    Mode mode;
+    void advance();
+    void reset();
+    int set(string _input,list<const char*> _regex);
+public:
+    /** default constructor. */
+    RegexTokenizer();
+
+    /** Tokenize a string in a mode. */
+    RegexTokenizer(string _input,Mode _mode);
+
+    /**  Tokenize a string according to a single regular expression. */
+    RegexTokenizer(string _input,const char* oneregex);
+
+    /** Tokenize a string according to several regular expressions.
+	(If the first regular expression fails, the next one will be tried. )
+     */
+    RegexTokenizer(string _input,list<const char*> _regex);
+
+    /** copy constructor */
+    RegexTokenizer(const RegexTokenizer &r);
+
+    //void selectFields(int m){ matchMask= m; }
+
+    /** The begin state */
+    RegexTokenizer begin() const;
+
+    /** The end state */
+    RegexTokenizer end() const;
+
+    /**
+	from Input Iterator
+        Returns the current token.
+    */
+    const string operator*() const
+    { return result[0]; };
+
+
+    /** from Input Iterator
+        Returns the i-th matched subexpression.
+    */
+    const string operator[](int i) const
+    { return result[i]; };
+
+    /** from Input Iterator
+	PreIncrement
+     */
+    RegexTokenizer& operator++()
+    { (*this).advance(); return *this; };
+
+    /** from Input Iterator
+	PostIncrement
+     */
+    RegexTokenizer& operator++(int i)
+    { while(i>0){ (*this).advance(); --i; }; return *this; };
+
+    /** Destructor */
+    virtual ~RegexTokenizer();
+
+    /** compare not equal */
+    bool operator != (const RegexTokenizer &R) const{// const & I say, const
+	return  so != R.so || eo != R.eo || previous_eo != R.previous_eo;
+    }
+
+    /** compare two RegexTokenizers */
+    bool operator == (const RegexTokenizer &R) const{
+	return !( *this != R );
+    }
+
+    /** print the current state of the RegexTokenizer */
+    friend ostream& operator<<(ostream &o,const RegexTokenizer &r);
+};
+
+
+//////////////////////////////////////////////////
+//                 TokenIterator                //
+//////////////////////////////////////////////////
+
+
+/** The TokenIterator extracts tokens from string or stream input.
+
+    There are four main modes and a custom mode. In all modes, the
+    backslash works as an escape character for the next character i.e.
+    'one\\\\backslash' is read as 'one\backslash'.
+
+    Description of the main modes:
+
+    1. Words separated by whitespace, with "whitespace" consisting of
+    tabulators and the blank.
+    \code
+    TokenIterator tokenize(inputStr,TokenIterator::Word);
+    \endcode
+
+    2. Words separated by whitespace, "one word" is one token.
+    whitespace is defined to be only tabulators and the blank.
+    \code
+    TokenIterator tokenize(inputStr,TokenIterator::Word,true);
+    \endcode
+
+    3. Each line is a token.
+    Escaped newlines will become part of the token.
+    example:
+    \code
+    TokenIterator tokenize(inputStr,TokenIterator::Line);
+    \endcode
+
+    4. RFC style:
+    Whitespace at start of next line appends next line.
+    The use of escaping the newline to append the next line,
+    like in Makefiles, is NOT part of this mode.
+    example:
+    \code
+    TokenIterator tokenize(inputStr,TokenIterator::RFC);
+    \endcode
+
+    5. The Custom Mode: The custom mode is intended for reading from
+    data that is in almost human-readable-format, like /etc/passwd.
+    Separating elements are not returned as Tokens, but are stored in
+    thesep and previoussep. In /etc/passwd ':' is the separator,
+    while newlines separate records.
+    \code
+    class MyCustomTokenIterator: public TokenIterator{
+        public:
+
+        MyCustomTokenIterator(string inputStr, bool b=false)
+	    : TokenIterator(inputStr,TokenIterator::Custom, b){
+	    eoltoken= '\n';
+	    separator= ":\n";
+        };
+
+        MyCustomTokenIterator(istream &inputStr, bool b=false)
+	    : TokenIterator(inputStr,TokenIterator::Custom, b){
+	    eoltoken= '\n';
+	    separator= ":\n";
+        };
+   \endcode
+   See \a CustomTokenIterator.cpp for the full example.
+
+    Bugs (Custom Mode): Does not recognize a separator preceded by whitespace
+    Instead, the tokenizer will collapse a series of whitespace, but
+    will offer it as a separator in thesep.
+    This is probably not what you want.
+*/
+
+
+class TokenIterator:istream_iterator<string,int> {
+
+private:
+    istream *i;
+    bool ismyistream;
+    string buffer;
+
+    static TokenIterator finalIterator;
+
+    static string mooncheese;
+
+public:
+    /** \relates TokenIterator
+	The modes allowed as arguments.
+     */
+    enum Mode {Word, Line, RFC, Custom};
+
+    typedef TokenIterator iterator;
+
+protected:
+    int brace;
+    int braceoftoken;
+    string bracestack;
+    bool braces;
+    Mode mode;
+
+    const char *whitespace;  // ALL whitespace must be listed here
+    const char *separator;   // separators
+    const char *continuation;// lists continuation
+    const char *leftbrace;   // leftbrace[i] matches rightbrace[i]
+    const char *rightbrace;  // supports multiple levels of braces
+    char escapechar;// escapechar is the escape char; default \ .
+    char eoltoken;  // use this instead of end of line
+    char whitetoken;// use this instead of whitespace
+
+    void setMode(Mode m);
+    void reset();
+
+public:
+    /**
+      Returns one token each call.
+      An empty token does NOT signal the end of the input.
+    */
+    virtual string operator()();
+
+    /** Dummy constructor */
+    /** constructs an Iterator that has reached end */
+    TokenIterator();
+
+    /** Constructor used to tokenize a string s,
+	using \a Mode m (default is Words),
+	by default without braces.
+    */
+    TokenIterator(string s, Mode m=Word, bool braces=false);
+
+    /** Constructor used to tokenize from an input stream,
+	using \a Mode m (default is Words),
+	by default without braces.
+
+	The input stream is consumed, which is why
+	the TokenIterator doesn''t offer backward iterator capabilities.
+    */
+    TokenIterator(istream &is, Mode m=Word, bool braces=false);
+
+
+    /** A begin function returning bool.
+	\a begin and \a end functions have been crafted to
+	work with this way of using iterators:
+	\code
+	ifstream is(somefilename);
+	TokenIterator tokenize(is);
+
+	while( tokenize->begin() != tokenize->end() ){
+	    string token= tokenize();
+	    ...
+	}
+	\endcode
+     */
+    iterator& begin() const;
+
+
+    /** A end function returning an iterator. See \a begin .
+     */
+    inline iterator& end() const{ return finalIterator; };
+
+
+    virtual ~TokenIterator();
+
+    //! from Input Iterator
+    //! Returns the current object in the stream.
+    operator string() const;
+
+
+    //! from Input Iterator
+    //! Returns the current object in the stream,
+    //! and the next object if the stream hasn't been read yet
+    operator string();
+
+
+    //! from Input Iterator
+    //! Returns the current object in the stream.
+    const string operator*() const;
+
+
+    //! from Input Iterator
+    //! Returns the current object in the stream,
+    //! and the next object if the stream hasn't been read yet
+    const string operator*();
+
+
+    //! from Input Iterator
+    //! Preincrement.
+    TokenIterator& operator++();
+
+    //! from Input Iterator
+    //! Postincrement.
+    //! this works .. almost
+    TokenIterator& operator++(int i);
+
+
+    /** compare not equal */
+    bool operator != (TokenIterator &R) const;
+
+
+    /** compare two Tokenizers */
+    bool operator == (TokenIterator &R) const;
+
+
+    /** need this for foreach template */
+    bool operator ! (void) const;
+
+
+    /** Introducing an implicit conversion to bool is not */
+    /** good because it creates an ambiguity, */
+    /** since bool may be converted implicitly to int and String. */
+    bool hastoken (void) const;
+
+
+
+    /** contains the separator that ended the token */
+    char thesep;
+
+    /** holds the separator that preceded the token */
+    char previoussep;
+
+    /** when using braces (in custom mode),
+	check this to get the number of unclosed braces. */
+    inline int bracingdepth() const{ return braceoftoken; };
+
+    /** use this to compare with instead of end of line \\n */
+    inline char eolToken() const{ return eoltoken; };
+
+    // use this to compare with instead of space */
+    inline char whiteToken() const{ return whitetoken; };
+};
+/** \example TokenIterator_test.cpp */
+/** \example CustomTokenIterator.cpp  */
+
+
+/**
+   The LexxStyleToken is returned by the \a LexxStyleTokenIterator
+\code
+   struct LexxStyleToken{
+    enum Tokentype {T1_separator, T1_string};
+    Tokentype ttype;
+    string Tstring;
+    char Tchar;
+    };
+\endcode
+*/
+struct LexxStyleToken{
+    enum Tokentype {T1_separator, T1_string};
+    Tokentype ttype;
+    string Tstring;
+    char Tchar;
+};
+
+/**
+   The \a LexxStyleToken iterator is a wrapper around the
+   \a TokenIterator . It returns the separators and the parts
+   of the string that are separated by the separators
+   in alteration.
+*/
+class LexxStyleTokenIterator{
+private:
+    TokenIterator *base;
+    int state;
+public:
+    /**
+       Return the current token,
+       without proceeding to the next token.
+     */
+    LexxStyleToken thetoken;
+
+    /**
+       Wrap the TokenIterator in the LexxStyleTokenIterator.
+     */
+    LexxStyleTokenIterator(TokenIterator *Tbase);
+
+    /**
+       Return the next token.
+     */
+    LexxStyleToken& operator()();
+};
+
+
+/**
+   \a crop_token removes leading and trailing whitespace from a token.
+   Example:
+   \code
+   cout << crop_token( " \thead tail \t" ) << endl; // prints "head tail"
+   \endcode
+*/
+
+inline string crop_token(const string &s, const string whitespace=string(" /t") ){
+    size_t left = s.find_first_not_of(whitespace.c_str());
+    size_t right= s.find_last_not_of(whitespace.c_str());
+    return string(s,left,right-left+1);
+};
+
+
+/** \a text_escape escapes newlines and escape characters
+    inside a string such that it may be read by the \a TokenIterator
+    in \a TokenIterator::Line or \a TokenIterator::Word Mode.
+*/
+inline string text_escape(const string &lines)
+{
+    unsigned int count= 0;
+
+    //
+    // count how many characters have to be escaped
+    //
+    for( unsigned int i=0;  i<lines.size(); ++i )
+	if( lines[i]=='\n' || lines[i]=='\\' )
+	    ++count;
+
+
+    string result("");
+    result.reserve( lines.size()+count+1 );
+
+    //
+    // escape characters
+    //
+      {
+
+	for( unsigned int i=0;  i<lines.size(); ++i )
+	    {
+	      if( lines[i]=='\n' || lines[i]=='\\' )
+		  result += '\\';
+	      result += lines[i];
+	    }
+
+      }
+
+    return result;
+}
+
+#endif // !defined(__LIB_TEXT_HPP__)
diff --git a/libtext/tokenizer_test.cpp b/libtext/tokenizer_test.cpp
new file mode 100644
index 0000000..a421e50
--- /dev/null
+++ b/libtext/tokenizer_test.cpp
@@ -0,0 +1,60 @@
+/*
+ * $Source$
+ * $Revision$
+ * $Date$
+ *
+ * Copyright (c) 1999 by CyberSolutions GmbH.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by CyberSolutions GmbH.
+ *
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <iostream>
+#include <list>
+
+#include "text.hpp"
+
+int
+main(int argc, char ** argv)
+{
+    //
+    // Test the tokenizer.
+    //
+    list<string>  l;
+    insert_iterator< list<string> > ii(l, l.end());
+    const string buf("this is a   test\n\n\n\r\tskfj     \t  blax\n");
+    tokenize(ii, buf);
+    cout << "Found " << l.size() << " tokens." << endl;
+    if( l.size()!=6 )
+      return 1;
+    copy(l.begin(), l.end(), ostream_iterator<string>(cout, "\n"));
+
+    return 0;
+}
diff --git a/libtext/transform_text.c b/libtext/transform_text.c
index 717f97c..94293a6 100644
--- a/libtext/transform_text.c
+++ b/libtext/transform_text.c
@@ -1,10 +1,39 @@
 /*
- *      $Source$
- *      $Revision$
- *      $Date$
+ * $Source$
+ * $Revision$
+ * $Date$
  *
- *      Copyright (C) 1996 by CyberSolutions GmbH.
- *      All rights reserved.
+ * Copyright (c) 1996-99 by Peter Simons <simons@cys.de>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Peter Simons.
+ *
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/types.h>
diff --git a/libtext/wordwrap.c b/libtext/wordwrap.c
index 1d4d1f7..7f9857f 100644
--- a/libtext/wordwrap.c
+++ b/libtext/wordwrap.c
@@ -1,10 +1,39 @@
 /*
- *      $Source$
- *      $Revision$
- *      $Date$
+ * $Source$
+ * $Revision$
+ * $Date$
  *
- *      Copyright (C) 1996,97 by CyberSolutions GmbH.
- *      All rights reserved.
+ * Copyright (c) 1996-99 by Peter Simons <simons@cys.de>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Peter Simons.
+ *
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <ctype.h>