381 lines
8.2 KiB
C++
381 lines
8.2 KiB
C++
/*
|
|
* $Source$
|
|
* $Revision$
|
|
* $Date$
|
|
*
|
|
* Copyright (c) 1999 by CyberSolutions GmbH, Germany.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by CyberSolutions GmbH.
|
|
*
|
|
* 4. The name of the author may not be used to endorse or promote products
|
|
* derived from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <strstream>
|
|
|
|
#include "text.hpp"
|
|
|
|
static int mystrpos(const char *c,char s){
|
|
int i=0;
|
|
while(c[i])
|
|
{
|
|
if(c[i]==s){
|
|
return i;
|
|
}
|
|
i++;
|
|
};
|
|
if(!c[i])
|
|
return -1;
|
|
else
|
|
return -1;
|
|
}
|
|
|
|
string TokenIterator::mooncheese= string("The Moon is A green cheese (sheesh!).");
|
|
|
|
void TokenIterator::reset(){
|
|
i= (istream*)0;
|
|
brace= 0; bracestack[0]='\0';
|
|
braceoftoken= 0;
|
|
thesep= '\0'; previoussep= '\0';
|
|
eoltoken= '\n';
|
|
whitetoken= ' ';
|
|
buffer= mooncheese;
|
|
}
|
|
|
|
void TokenIterator::setMode(Mode m){
|
|
mode= m;
|
|
switch(mode){
|
|
case Word:
|
|
whitespace=" \t";
|
|
separator="";
|
|
continuation="";
|
|
leftbrace="\"";
|
|
rightbrace="\"";
|
|
escapechar = '\\';
|
|
break;
|
|
case Line:
|
|
whitespace="";
|
|
separator="";
|
|
continuation="";
|
|
leftbrace="";
|
|
rightbrace="";
|
|
escapechar = '\\';
|
|
break;
|
|
case RFC:
|
|
whitespace="";
|
|
separator="";
|
|
continuation=" \t";
|
|
leftbrace="";
|
|
rightbrace="";
|
|
escapechar ='\\';
|
|
break;
|
|
default:
|
|
whitespace = " \t";
|
|
separator = ",;:+-=/\\@";
|
|
continuation="";
|
|
leftbrace = "\"([{<";
|
|
rightbrace = "\")]}>";
|
|
escapechar = '\\';
|
|
}
|
|
}
|
|
|
|
TokenIterator::TokenIterator(){
|
|
reset();
|
|
braces= false;
|
|
setMode(Word);
|
|
ismyistream= false;
|
|
}
|
|
|
|
|
|
|
|
TokenIterator TokenIterator::finalIterator = TokenIterator();
|
|
|
|
//TokenIterator::TokenIterator(string s, Mode m=Word, bool b=false){
|
|
TokenIterator::TokenIterator(string s, Mode m, bool b){
|
|
reset();
|
|
braces= b;
|
|
setMode(m);
|
|
ismyistream= true;
|
|
i= new istrstream(s.c_str());
|
|
//++(*this);// read first value (not done; makes this unwieldly)
|
|
}
|
|
|
|
//TokenIterator::TokenIterator(istream &is, Mode m=Word, bool b=false){
|
|
TokenIterator::TokenIterator(istream &is, Mode m, bool b){
|
|
reset();
|
|
braces= b;
|
|
setMode(m);
|
|
ismyistream= false;
|
|
i= &is;
|
|
//++(*this);// read first value (not done; makes this unwieldly)
|
|
}
|
|
|
|
TokenIterator::~TokenIterator(){
|
|
if(ismyistream)
|
|
delete i;
|
|
}
|
|
|
|
|
|
TokenIterator::iterator& TokenIterator::begin() const
|
|
{
|
|
if( i && i->good() && !i->eof() )
|
|
return *const_cast<TokenIterator*> (this);
|
|
else
|
|
return finalIterator;
|
|
};
|
|
|
|
|
|
//! from Input Iterator
|
|
//! Returns the next object in the stream.
|
|
TokenIterator::operator string() const
|
|
{
|
|
return buffer;
|
|
};
|
|
|
|
|
|
//! from Input Iterator
|
|
//! Returns the next object in the stream.
|
|
TokenIterator::operator string()
|
|
{
|
|
if( buffer== mooncheese )
|
|
(*this)();
|
|
return buffer;
|
|
};
|
|
|
|
|
|
//! from Input Iterator
|
|
//! Returns the next object in the stream.
|
|
const string TokenIterator::operator*() const
|
|
{
|
|
return buffer;
|
|
};
|
|
|
|
|
|
//! from Input Iterator
|
|
//! Returns the next object in the stream.
|
|
const string TokenIterator::operator*()
|
|
{
|
|
if( buffer== mooncheese )
|
|
(*this)();
|
|
return buffer;
|
|
};
|
|
|
|
|
|
//! from Input Iterator
|
|
//! Preincrement.
|
|
TokenIterator& TokenIterator::operator++()
|
|
{
|
|
(*this)(); return *this;
|
|
};
|
|
|
|
//! from Input Iterator
|
|
//! Postincrement.
|
|
//! this works .. almost
|
|
|
|
|
|
TokenIterator& TokenIterator::operator++(int i)
|
|
{
|
|
static TokenIterator t = *this;
|
|
while(i>0){ --i; (*this)++; }
|
|
return t;
|
|
};
|
|
|
|
|
|
/** compare not equal */
|
|
bool TokenIterator::operator != (TokenIterator &R) const{// const & I say, const
|
|
// note: const TokenIterator &R will create a copy of R :-(
|
|
// this can't work; have to allow use of const in the above
|
|
// has to be compared differently( endflags .. ! )
|
|
|
|
return &R!= this;
|
|
}
|
|
|
|
/** compare two Tokenizers */
|
|
bool TokenIterator::operator == (TokenIterator &R) const{
|
|
// note: const TokenIterator &R will create a copy of R :-(
|
|
// this can't work; have to allow use of const in the above
|
|
// has to be compared differently( endflags .. ! )
|
|
|
|
return !( *this != R );
|
|
}
|
|
|
|
/** need this for foreach template */
|
|
bool TokenIterator::operator ! (void) const{
|
|
return !( i && i->good() && !i->eof() );
|
|
}
|
|
|
|
/** need this for fun */
|
|
bool TokenIterator::hastoken(void) const{
|
|
return i && i->good() && !i->eof();
|
|
}
|
|
|
|
|
|
|
|
inline bool linefeed(char c, istream *i){
|
|
if(c=='\r'){
|
|
char d;
|
|
if( i->get(d) ){
|
|
if(d=='\n')
|
|
;/* dos line feed */
|
|
else
|
|
i->unget();
|
|
}
|
|
return true;
|
|
}else if(c=='\n'){
|
|
char d;
|
|
if( i->get(d) ){
|
|
if(d=='\r')
|
|
;/* carriage return after line feed(?) */
|
|
else
|
|
i->unget();
|
|
}
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
string TokenIterator::operator()(){
|
|
char c= 0;
|
|
int pos;
|
|
|
|
previoussep= thesep;
|
|
buffer= string("");
|
|
|
|
while( i->get(c) ){
|
|
|
|
if(c==escapechar){
|
|
|
|
char d;// special translations need to be plugged in here
|
|
|
|
if( i->get(d) ){
|
|
if( brace && linefeed(d,i) )
|
|
buffer+= '\n';
|
|
else
|
|
buffer+= d;
|
|
}
|
|
}
|
|
|
|
else if( linefeed(c,i) ){
|
|
|
|
thesep= eoltoken;
|
|
{
|
|
switch(mode){
|
|
case Word:
|
|
if( previoussep!=whitetoken || buffer.length() )// space" = "
|
|
return buffer;
|
|
break;
|
|
case Line:
|
|
return buffer;
|
|
break;
|
|
case RFC:
|
|
{
|
|
char d;
|
|
if( i->get(d) ){
|
|
if(!strchr(continuation,d) ){
|
|
i->unget();
|
|
return buffer;
|
|
}else
|
|
i->unget();
|
|
|
|
}
|
|
|
|
do{
|
|
if(!i->get(d)){ return buffer; }
|
|
}while( strchr(continuation,d) );
|
|
|
|
//should "A\n \tB" be returned as one token "AB" or as "A B" ?
|
|
// currently, "AB" is returned
|
|
i->unget();// unget
|
|
|
|
}
|
|
break;
|
|
default:
|
|
return buffer;
|
|
}
|
|
}
|
|
}
|
|
else if( !(brace) && strchr(whitespace,c) ){ // brace>0 implies braces==true
|
|
|
|
if(buffer.length()){
|
|
thesep= whitetoken;
|
|
return buffer;// send token
|
|
}else
|
|
previoussep= whitetoken;// !?
|
|
;/* skip */
|
|
|
|
}else if(strchr(separator,c)){
|
|
thesep= c;
|
|
if( previoussep!=whitetoken || buffer.length() )// space" = "
|
|
return buffer;// send token
|
|
|
|
}else if(brace>0 && bracestack[brace]==c){
|
|
|
|
/* closing brace */
|
|
braceoftoken= brace;
|
|
brace--; /* pop stack of braces */
|
|
|
|
thesep= c;
|
|
return buffer;// send token
|
|
|
|
}else if( braces && (pos=mystrpos(leftbrace,c), pos>=0) ){//pos>0
|
|
|
|
/* opening brace */
|
|
braceoftoken= brace;
|
|
bracestack[++brace]= rightbrace[pos];
|
|
if( previoussep!=whitetoken || buffer.length() ){// space" = "
|
|
thesep= c;
|
|
return buffer;// send token
|
|
}
|
|
}else{
|
|
/* normal, append to token */
|
|
buffer+= c;
|
|
}
|
|
}
|
|
return buffer;
|
|
}
|
|
|
|
|
|
LexxStyleTokenIterator::LexxStyleTokenIterator(TokenIterator *Tbase){
|
|
state=0;
|
|
base= Tbase;
|
|
}
|
|
|
|
LexxStyleToken& LexxStyleTokenIterator::operator()(){
|
|
state= !state;
|
|
thetoken.ttype= (LexxStyleToken::Tokentype)state;
|
|
if(state){
|
|
thetoken.Tstring= (*base)();
|
|
}else{
|
|
thetoken.Tchar= base->thesep;
|
|
}
|
|
return thetoken;
|
|
}
|