966 lines
24 KiB
C++
966 lines
24 KiB
C++
|
|
// +-------------------------------------------------------------------------+
|
||
|
|
// | StreamFilters library vers. 0.2.03 |
|
||
|
|
// | Copyright (c) Andrey V. Stolyarov <croco at croco dot net> 2022-2025 |
|
||
|
|
// | ----------------------------------------------------------------------- |
|
||
|
|
// | This is free software. Permission is granted to everyone to use, copy |
|
||
|
|
// | or modify this software under the terms and conditions of |
|
||
|
|
// | GNU LESSER GENERAL PUBLIC LICENSE, v. 2.1 |
|
||
|
|
// | as published by Free Software Foundation (see the file LGPL.txt) |
|
||
|
|
// | |
|
||
|
|
// | Please visit http://www.croco.net/software/stfilter to get a fresh copy |
|
||
|
|
// | ----------------------------------------------------------------------- |
|
||
|
|
// | This code is provided strictly and exclusively on the "AS IS" basis. |
|
||
|
|
// | !!! THERE IS NO WARRANTY OF ANY KIND, NEITHER EXPRESSED NOR IMPLIED !!! |
|
||
|
|
// +-------------------------------------------------------------------------+
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
#include <string.h> // for strlen, str[i]cmp
|
||
|
|
|
||
|
|
#include "stfhtml.hpp"
|
||
|
|
|
||
|
|
|
||
|
|
StreamFilterHtmlTags::
|
||
|
|
StreamFilterHtmlTags(const char * const * anames,
|
||
|
|
const char * const * aattrs,
|
||
|
|
StreamFilter *next)
|
||
|
|
: StreamFilter(next), names(anames), attrs(aattrs), pre_tags(0),
|
||
|
|
astate(start), qstate(unquoted)
|
||
|
|
#if 0
|
||
|
|
namebuf(0), namebufsize(0), attrbuf(0), attrbufsize(0)
|
||
|
|
#endif
|
||
|
|
{
|
||
|
|
}
|
||
|
|
|
||
|
|
StreamFilterHtmlTags::~StreamFilterHtmlTags()
|
||
|
|
{
|
||
|
|
#if 0
|
||
|
|
if(namebuf)
|
||
|
|
delete[] namebuf;
|
||
|
|
#endif
|
||
|
|
}
|
||
|
|
|
||
|
|
// please note we don't use isalpha(3) function family
|
||
|
|
// as well as str[n]casecmp(3)
|
||
|
|
// so that we don't suck in the damn locale infrastructure
|
||
|
|
|
||
|
|
static int isspc(int c)
|
||
|
|
{
|
||
|
|
return c==' ' || c=='\t' || c=='\r' || c=='\n' || c =='\v' || c=='\f';
|
||
|
|
}
|
||
|
|
|
||
|
|
static int is_tagnamechar(int c)
|
||
|
|
{
|
||
|
|
return
|
||
|
|
(c >= 'a' && c <= 'z') ||
|
||
|
|
(c >= 'A' && c <= 'Z') ||
|
||
|
|
(c >= '0' && c <= '9') ||
|
||
|
|
c == '_' || c == ':' || c == '.';
|
||
|
|
}
|
||
|
|
|
||
|
|
static int lowcase(int c)
|
||
|
|
{
|
||
|
|
return (c >= 'A' && c <= 'Z') ? c - 'A' + 'a' : c;
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::FeedChar(int c)
|
||
|
|
{
|
||
|
|
if(qstate != unquoted) {
|
||
|
|
if(astate == altag_goodattr)
|
||
|
|
PutChar(c);
|
||
|
|
if((qstate == single_quoted && c == '\'') ||
|
||
|
|
(qstate == double_quoted && c == '\"'))
|
||
|
|
{
|
||
|
|
qstate = unquoted;
|
||
|
|
}
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
// once we're here, this means we're in the unquoted state
|
||
|
|
switch(astate) {
|
||
|
|
case start: HandleStart(c); break;
|
||
|
|
case just_started_tag: HandleJustStartedTag(c); break;
|
||
|
|
case unnamed_tag: HandleUnnamedTag(c); break;
|
||
|
|
case tag_name: HandleTagName(c); break;
|
||
|
|
case allowed_tag: HandleAllowedTag(c); break;
|
||
|
|
case altag_attrname: HandleAltagAttrname(c); break;
|
||
|
|
case altag_expect_eq: HandleAltagExpectEq(c); break;
|
||
|
|
case altag_expect_val: HandleAltagExpectVal(c); break;
|
||
|
|
case altag_badattr: HandleAltagBadAttr(c); break;
|
||
|
|
case altag_goodattr: HandleAltagGoodAttr(c); break;
|
||
|
|
case altag_slashclose: HandleAltagSlashClose(c); break;
|
||
|
|
case prohibited_tag: HandleProhibitedTag(c); break;
|
||
|
|
case comment_in_excl: HandleCommentInExcl(c); break;
|
||
|
|
case comment_in_dash: HandleCommentInDash(c); break;
|
||
|
|
case comment: HandleComment(c); break;
|
||
|
|
case comment_out_dash: HandleCommentOutDash(c); break;
|
||
|
|
case comment_out_dash2: HandleCommentOutDash2(c); break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::FeedEnd()
|
||
|
|
{
|
||
|
|
if(astate == allowed_tag || astate == altag_expect_eq ||
|
||
|
|
astate == altag_expect_val || astate == altag_badattr ||
|
||
|
|
astate == altag_goodattr || astate == altag_slashclose)
|
||
|
|
{
|
||
|
|
if(astate == altag_goodattr) {
|
||
|
|
switch(qstate) {
|
||
|
|
case single_quoted:
|
||
|
|
PutChar('\'');
|
||
|
|
break;
|
||
|
|
case double_quoted:
|
||
|
|
PutChar('\"');
|
||
|
|
break;
|
||
|
|
case unquoted:
|
||
|
|
;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if(astate == altag_slashclose)
|
||
|
|
PutChar('/');
|
||
|
|
PutChar('>');
|
||
|
|
}
|
||
|
|
PutEnd();
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::Reset()
|
||
|
|
{
|
||
|
|
astate = start;
|
||
|
|
qstate = unquoted;
|
||
|
|
pre_level = 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::
|
||
|
|
AddControlledNLReplacer(const char * const *pt, bool texstyle)
|
||
|
|
{
|
||
|
|
pre_tags = pt;
|
||
|
|
StreamFilterHtmlReplaceNL *nlr =
|
||
|
|
new StreamFilterHtmlReplaceNL(texstyle, 0);
|
||
|
|
InsertAfter(nlr);
|
||
|
|
pre_level = 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::HandleStart(int c)
|
||
|
|
{
|
||
|
|
if(c == '<') {
|
||
|
|
NLModeBlock(true);
|
||
|
|
astate = just_started_tag;
|
||
|
|
negtag = false;
|
||
|
|
namebuf.Reset();
|
||
|
|
} else {
|
||
|
|
PutChar(c);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::HandleJustStartedTag(int c)
|
||
|
|
{
|
||
|
|
if(c == '!') {
|
||
|
|
astate = comment_in_excl;
|
||
|
|
} else
|
||
|
|
if(c == '/' || is_tagnamechar(c)) {
|
||
|
|
astate = unnamed_tag;
|
||
|
|
HandleUnnamedTag(c);
|
||
|
|
} else {
|
||
|
|
PutStr("<");
|
||
|
|
PutChar(c);
|
||
|
|
astate = start;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
static bool tagname_eq(const char *a, const char *b)
|
||
|
|
{
|
||
|
|
while(*a && *b) {
|
||
|
|
if(lowcase(*a) != lowcase(*b))
|
||
|
|
return 0;
|
||
|
|
a++;
|
||
|
|
b++;
|
||
|
|
}
|
||
|
|
return !*a && !*b;
|
||
|
|
}
|
||
|
|
|
||
|
|
static bool tagname_is_there(const char *name, const char * const *array)
|
||
|
|
{
|
||
|
|
int i;
|
||
|
|
for(i = 0; array[i]; i++)
|
||
|
|
if(tagname_eq(array[i], name))
|
||
|
|
return true;
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::HandleUnnamedTag(int c)
|
||
|
|
{
|
||
|
|
switch(c) {
|
||
|
|
case '/':
|
||
|
|
if(negtag) // more than one '/' before the name is too much
|
||
|
|
astate = prohibited_tag;
|
||
|
|
else
|
||
|
|
negtag = true;
|
||
|
|
break;
|
||
|
|
case '>':
|
||
|
|
astate = start; // just ignore the malformed tag
|
||
|
|
break;
|
||
|
|
default:
|
||
|
|
if(isspc(c))
|
||
|
|
break;
|
||
|
|
if(is_tagnamechar(c)) {
|
||
|
|
astate = tag_name;
|
||
|
|
namebuf.Reset();
|
||
|
|
namebuf.AddChar(c);
|
||
|
|
} else {
|
||
|
|
astate = prohibited_tag;
|
||
|
|
ProcessCharInsideSkippedTag(c);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::HandleTagName(int c)
|
||
|
|
{
|
||
|
|
// correct delimiter after the name may be either space, or '>', or "/>"
|
||
|
|
if(isspc(c) || c == '>' || c == '/') {
|
||
|
|
if(!names || tagname_is_there(namebuf.Get(), names)) {
|
||
|
|
astate = allowed_tag;
|
||
|
|
if(!negtag)
|
||
|
|
UpdateNLMode();
|
||
|
|
PutChar('<');
|
||
|
|
if(negtag)
|
||
|
|
PutChar('/');
|
||
|
|
PutStr(namebuf.Get());
|
||
|
|
if(c == '>')
|
||
|
|
CloseAllowedTag();
|
||
|
|
else
|
||
|
|
if(c == '/')
|
||
|
|
astate = altag_slashclose;
|
||
|
|
} else {
|
||
|
|
astate = prohibited_tag;
|
||
|
|
if(c == '>') {
|
||
|
|
NLModeBlock(false);
|
||
|
|
astate = start;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
} else
|
||
|
|
if(is_tagnamechar(c)) {
|
||
|
|
namebuf.AddChar(c);
|
||
|
|
} else {
|
||
|
|
astate = prohibited_tag;
|
||
|
|
ProcessCharInsideSkippedTag(c);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::HandleAllowedTag(int c)
|
||
|
|
{
|
||
|
|
switch(c) {
|
||
|
|
case ' ':
|
||
|
|
case '\t':
|
||
|
|
case '\n':
|
||
|
|
case '\r':
|
||
|
|
break;
|
||
|
|
case '=':
|
||
|
|
attrbuf.Reset(); // no name, so the ``value'' will get ignored
|
||
|
|
astate = altag_expect_val;
|
||
|
|
break;
|
||
|
|
case '<':
|
||
|
|
astate = altag_badattr;
|
||
|
|
break;
|
||
|
|
case '\'':
|
||
|
|
qstate = single_quoted;
|
||
|
|
astate = altag_badattr;
|
||
|
|
break;
|
||
|
|
case '\"':
|
||
|
|
qstate = double_quoted;
|
||
|
|
astate = altag_badattr;
|
||
|
|
break;
|
||
|
|
case '/':
|
||
|
|
astate = altag_slashclose;
|
||
|
|
break;
|
||
|
|
case '>':
|
||
|
|
CloseAllowedTag();
|
||
|
|
break;
|
||
|
|
default:
|
||
|
|
attrbuf.Reset();
|
||
|
|
attrbuf.AddChar(c);
|
||
|
|
astate = altag_attrname;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
static bool attrname_allowed(const char *tag, const char *attr,
|
||
|
|
const char * const *array)
|
||
|
|
{
|
||
|
|
if(!array)
|
||
|
|
return true;
|
||
|
|
|
||
|
|
bool res;
|
||
|
|
char *qual;
|
||
|
|
int qlen = strlen(tag) + 1 + strlen(attr) + 1;
|
||
|
|
qual = new char[qlen];
|
||
|
|
strcpy(qual, tag);
|
||
|
|
strcat(qual, "=");
|
||
|
|
strcat(qual, attr);
|
||
|
|
|
||
|
|
int i;
|
||
|
|
for(i = 0; array[i]; i++) {
|
||
|
|
if(tagname_eq(array[i], attr) || tagname_eq(array[i], qual)) {
|
||
|
|
res = true;
|
||
|
|
goto quit;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
res = false;
|
||
|
|
quit:
|
||
|
|
delete[] qual;
|
||
|
|
return res;
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::HandleAltagAttrname(int c)
|
||
|
|
{
|
||
|
|
switch(c) {
|
||
|
|
case ' ':
|
||
|
|
case '\t':
|
||
|
|
case '\n':
|
||
|
|
case '\r':
|
||
|
|
astate = altag_expect_eq;
|
||
|
|
break;
|
||
|
|
case '=':
|
||
|
|
astate = altag_expect_val;
|
||
|
|
break;
|
||
|
|
case '<':
|
||
|
|
astate = altag_badattr;
|
||
|
|
break;
|
||
|
|
case '\'':
|
||
|
|
qstate = single_quoted;
|
||
|
|
astate = altag_badattr;
|
||
|
|
break;
|
||
|
|
case '\"':
|
||
|
|
qstate = double_quoted;
|
||
|
|
astate = altag_badattr;
|
||
|
|
break;
|
||
|
|
case '/':
|
||
|
|
CommitAttrName();
|
||
|
|
astate = altag_slashclose;
|
||
|
|
break;
|
||
|
|
case '>':
|
||
|
|
CommitAttrName();
|
||
|
|
CloseAllowedTag();
|
||
|
|
break;
|
||
|
|
default:
|
||
|
|
attrbuf.AddChar(c);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::HandleAltagExpectEq(int c)
|
||
|
|
{
|
||
|
|
switch(c) {
|
||
|
|
case ' ':
|
||
|
|
case '\t':
|
||
|
|
case '\n':
|
||
|
|
case '\r':
|
||
|
|
break;
|
||
|
|
case '=':
|
||
|
|
astate = altag_expect_val;
|
||
|
|
break;
|
||
|
|
case '<':
|
||
|
|
CommitAttrName();
|
||
|
|
astate = altag_badattr;
|
||
|
|
break;
|
||
|
|
case '\'':
|
||
|
|
CommitAttrName();
|
||
|
|
qstate = single_quoted;
|
||
|
|
astate = altag_badattr;
|
||
|
|
break;
|
||
|
|
case '\"':
|
||
|
|
CommitAttrName();
|
||
|
|
qstate = double_quoted;
|
||
|
|
astate = altag_badattr;
|
||
|
|
break;
|
||
|
|
case '/':
|
||
|
|
CommitAttrName();
|
||
|
|
astate = altag_slashclose;
|
||
|
|
break;
|
||
|
|
case '>':
|
||
|
|
CommitAttrName();
|
||
|
|
CloseAllowedTag();
|
||
|
|
break;
|
||
|
|
default:
|
||
|
|
CommitAttrName();
|
||
|
|
attrbuf.Reset();
|
||
|
|
attrbuf.AddChar(c);
|
||
|
|
astate = altag_attrname;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::HandleAltagExpectVal(int c)
|
||
|
|
{
|
||
|
|
switch(c) {
|
||
|
|
case ' ':
|
||
|
|
case '\t':
|
||
|
|
case '\n':
|
||
|
|
case '\r':
|
||
|
|
return;
|
||
|
|
case '=':
|
||
|
|
case '<':
|
||
|
|
astate = altag_badattr;
|
||
|
|
return;
|
||
|
|
case '\'':
|
||
|
|
qstate = single_quoted;
|
||
|
|
break;
|
||
|
|
case '\"':
|
||
|
|
qstate = double_quoted;
|
||
|
|
break;
|
||
|
|
case '/':
|
||
|
|
/* CommitAttrName(); */ // no commit! malformed attr
|
||
|
|
astate = altag_slashclose;
|
||
|
|
return;
|
||
|
|
case '>':
|
||
|
|
/* CommitAttrName(); */ // no commit! malformed attr
|
||
|
|
CloseAllowedTag();
|
||
|
|
return;
|
||
|
|
default:
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool good = CommitAttrName();
|
||
|
|
if(good) {
|
||
|
|
PutChar('=');
|
||
|
|
PutChar(c);
|
||
|
|
astate = altag_goodattr;
|
||
|
|
} else {
|
||
|
|
astate = altag_badattr;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::HandleAltagBadAttr(int c)
|
||
|
|
{
|
||
|
|
switch(c) {
|
||
|
|
case ' ':
|
||
|
|
case '\t':
|
||
|
|
case '\n':
|
||
|
|
case '\r':
|
||
|
|
astate = allowed_tag;
|
||
|
|
break;
|
||
|
|
case '=':
|
||
|
|
case '<':
|
||
|
|
break;
|
||
|
|
case '\'':
|
||
|
|
qstate = single_quoted;
|
||
|
|
break;
|
||
|
|
case '\"':
|
||
|
|
qstate = double_quoted;
|
||
|
|
break;
|
||
|
|
case '/':
|
||
|
|
astate = altag_slashclose;
|
||
|
|
break;
|
||
|
|
case '>':
|
||
|
|
CloseAllowedTag();
|
||
|
|
break;
|
||
|
|
default:
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::HandleAltagGoodAttr(int c)
|
||
|
|
{
|
||
|
|
switch(c) {
|
||
|
|
case ' ':
|
||
|
|
case '\t':
|
||
|
|
case '\n':
|
||
|
|
case '\r':
|
||
|
|
astate = allowed_tag;
|
||
|
|
break;
|
||
|
|
case '=':
|
||
|
|
case '<':
|
||
|
|
astate = altag_badattr;
|
||
|
|
break;
|
||
|
|
case '\'':
|
||
|
|
PutChar(c);
|
||
|
|
qstate = single_quoted;
|
||
|
|
break;
|
||
|
|
case '\"':
|
||
|
|
PutChar(c);
|
||
|
|
qstate = double_quoted;
|
||
|
|
break;
|
||
|
|
case '/':
|
||
|
|
astate = altag_slashclose;
|
||
|
|
break;
|
||
|
|
case '>':
|
||
|
|
CloseAllowedTag();
|
||
|
|
break;
|
||
|
|
default:
|
||
|
|
PutChar(c);
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::HandleAltagSlashClose(int c)
|
||
|
|
{
|
||
|
|
if(c == '>') {
|
||
|
|
PutChar(' ');
|
||
|
|
PutChar('/');
|
||
|
|
CloseAllowedTag();
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::HandleProhibitedTag(int c)
|
||
|
|
{
|
||
|
|
if(qstate == unquoted && c == '\n') {
|
||
|
|
PutChar('\n');
|
||
|
|
astate = start;
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
ProcessCharInsideSkippedTag(c);
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::HandleCommentInExcl(int c)
|
||
|
|
{
|
||
|
|
if(c == '-') {
|
||
|
|
astate = comment_in_dash;
|
||
|
|
} else {
|
||
|
|
// this is incorrect comment start! let's handle this... somehow
|
||
|
|
PutStr("<!");
|
||
|
|
PutChar(c);
|
||
|
|
astate = start;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::HandleCommentInDash(int c)
|
||
|
|
{
|
||
|
|
if(c == '-') {
|
||
|
|
astate = comment;
|
||
|
|
} else {
|
||
|
|
// this is incorrect comment start! let's handle this... somehow
|
||
|
|
PutStr("<!-");
|
||
|
|
PutChar(c);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::HandleComment(int c)
|
||
|
|
{
|
||
|
|
if(c == '-') {
|
||
|
|
astate = comment_out_dash;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::HandleCommentOutDash(int c)
|
||
|
|
{
|
||
|
|
if(c == '-') {
|
||
|
|
astate = comment_out_dash2;
|
||
|
|
} else {
|
||
|
|
astate = comment;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::HandleCommentOutDash2(int c)
|
||
|
|
{
|
||
|
|
switch(c) {
|
||
|
|
case '>':
|
||
|
|
astate = start;
|
||
|
|
NLModeBlock(false);
|
||
|
|
break;
|
||
|
|
case '-': // well, let's just remain waiting for ``-->''
|
||
|
|
// actually, this may be smth. like ``--------->''
|
||
|
|
break;
|
||
|
|
default: // failed to quit the comment :-)
|
||
|
|
astate = comment;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::ProcessCharInsideSkippedTag(int c)
|
||
|
|
{
|
||
|
|
if(c == '\'')
|
||
|
|
qstate = single_quoted;
|
||
|
|
else
|
||
|
|
if(c == '\"')
|
||
|
|
qstate = double_quoted;
|
||
|
|
else
|
||
|
|
if(c == '>') {
|
||
|
|
astate = start;
|
||
|
|
NLModeBlock(false);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
bool StreamFilterHtmlTags::CommitAttrName()
|
||
|
|
{
|
||
|
|
bool res = attrname_allowed(namebuf.Get(), attrbuf.Get(), attrs);
|
||
|
|
if(res) {
|
||
|
|
PutChar(' ');
|
||
|
|
PutStr(attrbuf.Get());
|
||
|
|
}
|
||
|
|
return res;
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::CloseAllowedTag()
|
||
|
|
{
|
||
|
|
PutChar('>');
|
||
|
|
NLModeBlock(false);
|
||
|
|
if(negtag)
|
||
|
|
UpdateNLMode();
|
||
|
|
astate = start;
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::NLModeBlock(bool b)
|
||
|
|
{
|
||
|
|
if(!pre_tags)
|
||
|
|
return;
|
||
|
|
StreamFilterHtmlReplaceNL *replacer =
|
||
|
|
static_cast<StreamFilterHtmlReplaceNL*>(GetNext());
|
||
|
|
if(b)
|
||
|
|
replacer->TagStart();
|
||
|
|
else
|
||
|
|
replacer->TagEnd();
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlTags::UpdateNLMode()
|
||
|
|
{
|
||
|
|
if(!pre_tags)
|
||
|
|
return;
|
||
|
|
StreamFilterHtmlReplaceNL *replacer =
|
||
|
|
static_cast<StreamFilterHtmlReplaceNL*>(GetNext());
|
||
|
|
if(tagname_is_there(namebuf.Get(), pre_tags)) {
|
||
|
|
if(negtag) {
|
||
|
|
if(pre_level > 0)
|
||
|
|
pre_level--;
|
||
|
|
if(pre_level == 0)
|
||
|
|
replacer->Enable();
|
||
|
|
} else {
|
||
|
|
if(pre_level == 0)
|
||
|
|
replacer->Disable();
|
||
|
|
pre_level++;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
|
||
|
|
void StreamFilterHtmlReplaceNL::FeedChar(int c)
|
||
|
|
{
|
||
|
|
if(blocked && state == start && c == '<') {
|
||
|
|
// VERY special case, when a paragraph starts with a tag
|
||
|
|
// which (in the present version) is not marked as disabling
|
||
|
|
// the NL replacing (but for future versions: with a tag which
|
||
|
|
// may appear inside a paragraph)
|
||
|
|
PutStr("<p>");
|
||
|
|
state = plain;
|
||
|
|
PutChar('<');
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
if(blocked) {
|
||
|
|
PutChar(c);
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
if(c == '\r') {
|
||
|
|
out_cr = true; // yes, we will transmit CRs for all NLs, ...
|
||
|
|
return; // but we ignore them on input
|
||
|
|
}
|
||
|
|
switch(state) {
|
||
|
|
case start:
|
||
|
|
if(isspc(c)) {
|
||
|
|
XPutChar(c);
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
// non-whitespace found!
|
||
|
|
PutStr("<p>");
|
||
|
|
XPutChar(c);
|
||
|
|
state = plain;
|
||
|
|
break;
|
||
|
|
case plain:
|
||
|
|
if(c == '\n') {
|
||
|
|
state = after_single_nl;
|
||
|
|
lead_spaces = 0;
|
||
|
|
if(texstyle)
|
||
|
|
PutNL();
|
||
|
|
} else {
|
||
|
|
XPutChar(c);
|
||
|
|
}
|
||
|
|
break;
|
||
|
|
case after_single_nl:
|
||
|
|
if(c == '\n') {
|
||
|
|
PutStr("</p>");
|
||
|
|
PutNL();
|
||
|
|
state = start;
|
||
|
|
} else
|
||
|
|
if(isspc(c)) {
|
||
|
|
lead_spaces++;
|
||
|
|
if(texstyle)
|
||
|
|
XPutChar(c);
|
||
|
|
} else {
|
||
|
|
CommitSingleNL();
|
||
|
|
XPutChar(c);
|
||
|
|
state = plain;
|
||
|
|
}
|
||
|
|
break;
|
||
|
|
case disabled:
|
||
|
|
XPutChar(c);
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
void StreamFilterHtmlReplaceNL::FeedEnd()
|
||
|
|
{
|
||
|
|
if(state != start && state != disabled && !blocked) {
|
||
|
|
// at least one char has been output!
|
||
|
|
PutStr("</p>");
|
||
|
|
PutNL();
|
||
|
|
}
|
||
|
|
|
||
|
|
/* the only ``unfinished'' situation is after 1 NL, but, well,
|
||
|
|
we don't want any trailing spaces anyway
|
||
|
|
*/
|
||
|
|
|
||
|
|
PutEnd();
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlReplaceNL::Reset()
|
||
|
|
{
|
||
|
|
state = start;
|
||
|
|
blocked = false;
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlReplaceNL::TagStart()
|
||
|
|
{
|
||
|
|
if(state == after_single_nl) {
|
||
|
|
CommitSingleNL();
|
||
|
|
state = plain;
|
||
|
|
}
|
||
|
|
blocked = true;
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlReplaceNL::TagEnd()
|
||
|
|
{
|
||
|
|
blocked = false;
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlReplaceNL::Enable()
|
||
|
|
{
|
||
|
|
state = start;
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlReplaceNL::Disable()
|
||
|
|
{
|
||
|
|
if(state != start) { // at least one char was output!
|
||
|
|
PutStr("</p>");
|
||
|
|
PutNL();
|
||
|
|
}
|
||
|
|
state = disabled;
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlReplaceNL::XPutChar(int c)
|
||
|
|
{
|
||
|
|
if(c == '\r')
|
||
|
|
return;
|
||
|
|
if(out_cr && c == '\n')
|
||
|
|
PutChar('\r');
|
||
|
|
PutChar(c);
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlReplaceNL::PutNL()
|
||
|
|
{
|
||
|
|
if(out_cr)
|
||
|
|
PutChar('\r');
|
||
|
|
PutChar('\n');
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterHtmlReplaceNL::CommitSingleNL()
|
||
|
|
{
|
||
|
|
if(texstyle)
|
||
|
|
return;
|
||
|
|
PutStr("<br />");
|
||
|
|
PutNL();
|
||
|
|
int i;
|
||
|
|
for(i = 0; i < lead_spaces; i++)
|
||
|
|
PutChar(' ');
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
void StreamFilterHtmlProtect::FeedChar(int c)
|
||
|
|
{
|
||
|
|
switch(c) {
|
||
|
|
case '>': PutStr(">"); break;
|
||
|
|
case '<': PutStr("<"); break;
|
||
|
|
case '&': PutStr("&"); break;
|
||
|
|
default: PutChar(c);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////
|
||
|
|
// boring tables follow
|
||
|
|
|
||
|
|
/* the following tables cover up the whole set of HTTP4 (!) entities,
|
||
|
|
as listed at https://www.w3schools.com/charsets/ref_html_entities_4.asp
|
||
|
|
|
||
|
|
However, three small sections are excluded intentionally to reduce the
|
||
|
|
search time, so ƒ, ◊, ♠, ♣, ♥ and ♦
|
||
|
|
are left aside; if you need them, remove the appropriate "#if 0"s
|
||
|
|
below.
|
||
|
|
|
||
|
|
Please note that HTML5 entities are not in the list; there are too
|
||
|
|
many of them, and HTML5 itself is just another commitee-made bastard
|
||
|
|
which has no right to exist.
|
||
|
|
*/
|
||
|
|
|
||
|
|
static const char *html_entities_a0[96] = {
|
||
|
|
"nbsp", "iexcl", "cent", "pound", "curren", "yen", "brvbar", "sect",
|
||
|
|
"uml", "copy", "ordf", "laquo", "not", "shy", "reg", "macr",
|
||
|
|
"deg", "plusmn", "sup2", "sup3", "acute", "micro", "para", "middot",
|
||
|
|
"cedil", "sup1", "ordm", "raquo", "frac14", "frac12", "frac34", "iquest",
|
||
|
|
"Agrave", "Aacute", "Acirc", "Atilde", "Auml", "Aring", "AElig", "Ccedil",
|
||
|
|
"Egrave", "Eacute", "Ecirc", "Euml", "Igrave", "Iacute", "Icirc", "Iuml",
|
||
|
|
"ETH", "Ntilde", "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml", "times",
|
||
|
|
"Oslash", "Ugrave", "Uacute", "Ucirc", "Uuml", "Yacute", "THORN", "szlig",
|
||
|
|
"agrave", "aacute", "acirc", "atilde", "auml", "aring", "aelig", "ccedil",
|
||
|
|
"egrave", "eacute", "ecirc", "euml", "igrave", "iacute", "icirc", "iuml",
|
||
|
|
"eth", "ntilde", "ograve", "oacute", "ocirc", "otilde", "ouml", "divide",
|
||
|
|
"oslash", "ugrave", "uacute", "ucirc", "uuml", "yacute", "thorn", "yuml"
|
||
|
|
};
|
||
|
|
#if 0
|
||
|
|
static const char *html_entities_192[1] = {
|
||
|
|
"fnof"
|
||
|
|
};
|
||
|
|
#endif
|
||
|
|
static const char *html_entities_391[70] = {
|
||
|
|
"Alpha", "Beta", "Gamma", "Delta", "Epsilon", "Zeta", "Eta", "Theta",
|
||
|
|
"Iota", "Kappa", "Lambda", "Mu", "Nu", "Xi", "Omicron", "Pi",
|
||
|
|
"Rho", 0, "Sigma", "Tau", "Upsilon", "Phi", "Chi", "Psi",
|
||
|
|
"Omega", 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
"alpha", "beta", "gamma", "delta", "epsilon", "zeta", "eta", "theta",
|
||
|
|
"iota", "kappa", "lambda", "mu", "nu", "xi", "omicron", "pi",
|
||
|
|
"rho", "sigmaf", "sigma", "tau", "upsilon", "phi", "chi", "psi",
|
||
|
|
"omega", 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
"thetasym", "upsih", 0, 0, 0, "piv"
|
||
|
|
};
|
||
|
|
static const char *html_entities_2022[35] = {
|
||
|
|
"bull", 0, 0, 0, "hellip", 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
"prime", "Prime", 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, "oline", 0, 0, 0,
|
||
|
|
0, 0, "frasl"
|
||
|
|
};
|
||
|
|
static const char *html_entities_2111[255] = {
|
||
|
|
"image", 0, 0, 0, 0, 0, 0, "weierp",
|
||
|
|
0, 0, 0, "real", 0, 0, 0, 0,
|
||
|
|
0, "trade", 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, "alefsym", 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, "larr",
|
||
|
|
"uarr", "rarr", "darr", "harr", 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, "crarr", 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, "lArr",
|
||
|
|
"uArr", "rArr", "dArr", "hArr", 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, "forall",
|
||
|
|
0, "part", "exist", 0, "empty", 0, "nabla", "isin",
|
||
|
|
"notin", 0, "ni", 0, 0, 0, "prod"
|
||
|
|
};
|
||
|
|
static const char *html_entities_2211[282] = {
|
||
|
|
"sum", "minus", 0, 0, 0, 0, "lowast", 0,
|
||
|
|
0, "radic", 0, 0, "prop", "infin", 0, "ang",
|
||
|
|
0, 0, 0, 0, 0, 0, "and", "or",
|
||
|
|
"cap", "cup", "int", 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, "there4", 0, 0, 0, 0,
|
||
|
|
0, 0, 0, "sim", 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, "cong", 0, 0, "asymp",
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, "ne",
|
||
|
|
"equiv", 0, 0, "le", "ge", 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, "sub", "sup", "nsub", 0, "sube", "supe", 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, "oplus", 0, "otimes", 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, "perp", 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, "sdot", 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, "lceil",
|
||
|
|
"rceil", "lfloor", "rfloor", 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
|
"lang", "rang"
|
||
|
|
};
|
||
|
|
#if 0
|
||
|
|
static const char *html_entities_25ca[1] = {
|
||
|
|
"loz"
|
||
|
|
};
|
||
|
|
static const char *html_entities_2660[7] = {
|
||
|
|
"spades", 0, 0, "clubs", 0, "hearts", "diams"
|
||
|
|
};
|
||
|
|
#endif
|
||
|
|
struct html_entity_section {
|
||
|
|
int base, len;
|
||
|
|
const char **names;
|
||
|
|
};
|
||
|
|
static struct html_entity_section html_entity_table[] = {
|
||
|
|
{ 0x000a0, 96, html_entities_a0 },
|
||
|
|
#if 0
|
||
|
|
{ 0x00192, 1, html_entities_192 },
|
||
|
|
#endif
|
||
|
|
{ 0x00391, 70, html_entities_391 },
|
||
|
|
{ 0x02022, 35, html_entities_2022 },
|
||
|
|
{ 0x02111, 255, html_entities_2111 },
|
||
|
|
{ 0x02211, 282, html_entities_2211 },
|
||
|
|
#if 0
|
||
|
|
{ 0x025ca, 1, html_entities_25ca },
|
||
|
|
{ 0x02660, 7, html_entities_2660 },
|
||
|
|
#endif
|
||
|
|
{ 0, 0, 0 }
|
||
|
|
};
|
||
|
|
|
||
|
|
// end of boring tables
|
||
|
|
///////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
static const char *get_html_entity(int code)
|
||
|
|
{
|
||
|
|
struct html_entity_section *p;
|
||
|
|
for(p = html_entity_table; p->names; p++)
|
||
|
|
if(code >= p->base && code < p->base + p->len)
|
||
|
|
return p->names[code - p->base];
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
void StreamFilterUtf8ToHtml::UnknownCode(int code)
|
||
|
|
{
|
||
|
|
const char *e = get_html_entity(code);
|
||
|
|
if(e) {
|
||
|
|
PutChar('&');
|
||
|
|
PutStr(e);
|
||
|
|
PutChar(';');
|
||
|
|
} else {
|
||
|
|
PutStr("&#x");
|
||
|
|
PutHex(code, 0, true);
|
||
|
|
PutChar(';');
|
||
|
|
}
|
||
|
|
}
|