diff --git a/lib/Parser.js b/lib/Parser.js
index 3db01b4fc..4d9ef5cd2 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -139,6 +139,10 @@ Parser.prototype.ontext = function(data){
if(this._cbs.ontext) this._cbs.ontext(data);
};
+Parser.prototype.oncontent = function(data){
+ if(this._cbs.oncontent) this._cbs.oncontent(data);
+};
+
Parser.prototype.onopentagname = function(name){
if(this._lowerCaseTagNames){
name = name.toLowerCase();
@@ -286,6 +290,7 @@ Parser.prototype.oncdata = function(value){
if(this._options.xmlMode || this._options.recognizeCDATA){
if(this._cbs.oncdatastart) this._cbs.oncdatastart();
if(this._cbs.ontext) this._cbs.ontext(value);
+ if(this._cbs.oncontent) this._cbs.oncontent(value);
if(this._cbs.oncdataend) this._cbs.oncdataend();
} else {
this.oncomment("[CDATA[" + value + "]]");
diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index ec013c127..cb24c75fc 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -134,6 +134,7 @@ function consumeSpecialNameChar(upper, NEXT_STATE){
function Tokenizer(options, cbs){
this._state = TEXT;
this._buffer = "";
+ this._content = "";
this._sectionStart = 0;
this._index = 0;
this._bufferOffset = 0; //chars removed from _buffer
@@ -148,15 +149,11 @@ function Tokenizer(options, cbs){
Tokenizer.prototype._stateText = function(c){
if(c === "<"){
- if(this._index > this._sectionStart){
- this._cbs.ontext(this._getSection());
- }
+ this._flushText();
this._state = BEFORE_TAG_NAME;
this._sectionStart = this._index;
} else if(this._decodeEntities && this._special === SPECIAL_NONE && c === "&"){
- if(this._index > this._sectionStart){
- this._cbs.ontext(this._getSection());
- }
+ this._flushText();
this._baseState = TEXT;
this._state = BEFORE_ENTITY;
this._sectionStart = this._index;
@@ -175,7 +172,7 @@ Tokenizer.prototype._stateBeforeTagName = function(c){
this._state = IN_PROCESSING_INSTRUCTION;
this._sectionStart = this._index + 1;
} else if(c === "<"){
- this._cbs.ontext(this._getSection());
+ this._flushText();
this._sectionStart = this._index;
} else {
this._state = (!this._xmlMode && (c === "s" || c === "S")) ?
@@ -186,6 +183,7 @@ Tokenizer.prototype._stateBeforeTagName = function(c){
Tokenizer.prototype._stateInTagName = function(c){
if(c === "/" || c === ">" || whitespace(c)){
+ this._flushContent();
this._emitToken("onopentagname");
this._state = BEFORE_ATTRIBUTE_NAME;
this._index--;
@@ -211,6 +209,7 @@ Tokenizer.prototype._stateBeforeCloseingTagName = function(c){
Tokenizer.prototype._stateInCloseingTagName = function(c){
if(c === ">" || whitespace(c)){
+ this._flushContent();
this._emitToken("onclosetag");
this._state = AFTER_CLOSING_TAG_NAME;
this._index--;
@@ -602,9 +601,7 @@ Tokenizer.prototype._cleanup = function (){
this._bufferOffset += this._index;
} else if(this._running){
if(this._state === TEXT){
- if(this._sectionStart !== this._index){
- this._cbs.ontext(this._buffer.substr(this._sectionStart));
- }
+ this._flushText();
this._buffer = "";
this._index = 0;
this._bufferOffset += this._index;
@@ -831,51 +828,49 @@ Tokenizer.prototype.end = function(chunk){
Tokenizer.prototype._finish = function(){
//if there is remaining data, emit it in a reasonable way
- if(this._sectionStart < this._index){
- this._handleTrailingData();
- }
+ this._handleTrailingData();
this._cbs.onend();
};
Tokenizer.prototype._handleTrailingData = function(){
- var data = this._buffer.substr(this._sectionStart);
-
- if(this._state === IN_CDATA || this._state === AFTER_CDATA_1 || this._state === AFTER_CDATA_2){
- this._cbs.oncdata(data);
- } else if(this._state === IN_COMMENT || this._state === AFTER_COMMENT_1 || this._state === AFTER_COMMENT_2){
- this._cbs.oncomment(data);
- } else if(this._state === IN_NAMED_ENTITY && !this._xmlMode){
- this._parseLegacyEntity();
- if(this._sectionStart < this._index){
+
+ if(this._sectionStart < this._index){
+ var data = this._buffer.substr(this._sectionStart);
+ if(this._state === IN_CDATA || this._state === AFTER_CDATA_1 || this._state === AFTER_CDATA_2){
+ this._cbs.oncdata(data);
+ } else if(this._state === IN_COMMENT || this._state === AFTER_COMMENT_1 || this._state === AFTER_COMMENT_2){
+ this._cbs.oncomment(data);
+ } else if(this._state === IN_NAMED_ENTITY && !this._xmlMode){
+ this._parseLegacyEntity();
this._state = this._baseState;
this._handleTrailingData();
- }
- } else if(this._state === IN_NUMERIC_ENTITY && !this._xmlMode){
- this._decodeNumericEntity(2, 10);
- if(this._sectionStart < this._index){
+ } else if(this._state === IN_NUMERIC_ENTITY && !this._xmlMode){
+ this._decodeNumericEntity(2, 10);
this._state = this._baseState;
this._handleTrailingData();
- }
- } else if(this._state === IN_HEX_ENTITY && !this._xmlMode){
- this._decodeNumericEntity(3, 16);
- if(this._sectionStart < this._index){
+ } else if(this._state === IN_HEX_ENTITY && !this._xmlMode){
+ this._decodeNumericEntity(3, 16);
this._state = this._baseState;
this._handleTrailingData();
+ } else if(
+ this._state !== IN_TAG_NAME &&
+ this._state !== BEFORE_ATTRIBUTE_NAME &&
+ this._state !== BEFORE_ATTRIBUTE_VALUE &&
+ this._state !== AFTER_ATTRIBUTE_NAME &&
+ this._state !== IN_ATTRIBUTE_NAME &&
+ this._state !== IN_ATTRIBUTE_VALUE_SQ &&
+ this._state !== IN_ATTRIBUTE_VALUE_DQ &&
+ this._state !== IN_ATTRIBUTE_VALUE_NQ &&
+ this._state !== IN_CLOSING_TAG_NAME
+ ){
+ this._flushText();
+ this._flushContent();
}
- } else if(
- this._state !== IN_TAG_NAME &&
- this._state !== BEFORE_ATTRIBUTE_NAME &&
- this._state !== BEFORE_ATTRIBUTE_VALUE &&
- this._state !== AFTER_ATTRIBUTE_NAME &&
- this._state !== IN_ATTRIBUTE_NAME &&
- this._state !== IN_ATTRIBUTE_VALUE_SQ &&
- this._state !== IN_ATTRIBUTE_VALUE_DQ &&
- this._state !== IN_ATTRIBUTE_VALUE_NQ &&
- this._state !== IN_CLOSING_TAG_NAME
- ){
- this._cbs.ontext(data);
+ } else if(this._state === TEXT){
+ this._flushContent();
}
+
//else, ignore remaining data
//TODO add a way to remove current tag
};
@@ -902,5 +897,21 @@ Tokenizer.prototype._emitPartial = function(value){
this._cbs.onattribdata(value); //TODO implement the new event
} else {
this._cbs.ontext(value);
+ this._content += value;
+ }
+};
+
+Tokenizer.prototype._flushText = function(){
+ if(this._index > this._sectionStart){
+ var text = this._getSection();
+ this._cbs.ontext(text);
+ this._content += text;
+ }
+};
+Tokenizer.prototype._flushContent = function(){
+ if(!this._content){
+ return;
}
+ this._cbs.oncontent(this._content);
+ this._content = "";
};
diff --git a/lib/index.js b/lib/index.js
index 880f57e90..5dd44bfb0 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -56,6 +56,7 @@ module.exports = {
cdatastart: 0,
cdataend: 0,
text: 1,
+ content: 1,
processinginstruction: 2,
comment: 1,
commentend: 0,
diff --git a/test/Events/01-simple.json b/test/Events/01-simple.json
index ab3076ac5..4e753232c 100644
--- a/test/Events/01-simple.json
+++ b/test/Events/01-simple.json
@@ -34,6 +34,12 @@
"adsf"
]
},
+ {
+ "event": "content",
+ "data": [
+ "adsf"
+ ]
+ },
{
"event": "closetag",
"data": [
diff --git a/test/Events/02-template.json b/test/Events/02-template.json
index df344b6a2..abc4bcb45 100644
--- a/test/Events/02-template.json
+++ b/test/Events/02-template.json
@@ -47,6 +47,12 @@
"
Heading1
"
]
},
+ {
+ "event": "content",
+ "data": [
+ "Heading1
"
+ ]
+ },
{
"event": "closetag",
"data": [
diff --git a/test/Events/03-lowercase_tags.json b/test/Events/03-lowercase_tags.json
index 9b58c5999..90373aeae 100644
--- a/test/Events/03-lowercase_tags.json
+++ b/test/Events/03-lowercase_tags.json
@@ -36,6 +36,12 @@
"adsf"
]
},
+ {
+ "event": "content",
+ "data": [
+ "adsf"
+ ]
+ },
{
"event": "closetag",
"data": [
diff --git a/test/Events/04-cdata.json b/test/Events/04-cdata.json
index 6032b6882..7ff133715 100644
--- a/test/Events/04-cdata.json
+++ b/test/Events/04-cdata.json
@@ -29,6 +29,12 @@
" asdf ><> fo"
]
},
+ {
+ "event": "content",
+ "data": [
+ " asdf ><> fo"
+ ]
+ },
{
"event": "cdataend",
"data": []
diff --git a/test/Events/05-cdata-special.json b/test/Events/05-cdata-special.json
index 686cb1a2f..6ffb10551 100644
--- a/test/Events/05-cdata-special.json
+++ b/test/Events/05-cdata-special.json
@@ -25,6 +25,12 @@
"/*<> fo/*]]>*/"
]
},
+ {
+ "event": "content",
+ "data": [
+ "/*<> fo/*]]>*/"
+ ]
+ },
{
"event": "closetag",
"data": [
diff --git a/test/Events/06-leading-lt.json b/test/Events/06-leading-lt.json
index fcec85289..1ad0d0a75 100644
--- a/test/Events/06-leading-lt.json
+++ b/test/Events/06-leading-lt.json
@@ -11,6 +11,12 @@
"data": [
">a>"
]
+ },
+ {
+ "event": "content",
+ "data": [
+ ">a>"
+ ]
}
]
}
\ No newline at end of file
diff --git a/test/Events/07-self-closing.json b/test/Events/07-self-closing.json
index 49ed93b85..6142b4370 100644
--- a/test/Events/07-self-closing.json
+++ b/test/Events/07-self-closing.json
@@ -10,58 +10,64 @@
},
"html": "Foo
",
"expected": [
- {
- "event": "opentagname",
- "data": [
- "a"
- ]
- },
- {
- "event": "attribute",
- "data": [
- "href",
- "http://test.com/"
- ]
- },
- {
- "event": "opentag",
- "data": [
- "a",
- {
- "href": "http://test.com/"
- }
- ]
- },
- {
- "event": "text",
- "data": [
- "Foo"
- ]
- },
- {
- "event": "closetag",
- "data": [
- "a"
- ]
- },
- {
- "event": "opentagname",
- "data": [
- "hr"
- ]
- },
- {
- "event": "opentag",
- "data": [
- "hr",
- {}
- ]
- },
- {
- "event": "closetag",
- "data": [
- "hr"
- ]
- }
- ]
+ {
+ "event": "opentagname",
+ "data": [
+ "a"
+ ]
+ },
+ {
+ "event": "attribute",
+ "data": [
+ "href",
+ "http://test.com/"
+ ]
+ },
+ {
+ "event": "opentag",
+ "data": [
+ "a",
+ {
+ "href": "http://test.com/"
+ }
+ ]
+ },
+ {
+ "event": "text",
+ "data": [
+ "Foo"
+ ]
+ },
+ {
+ "event": "content",
+ "data": [
+ "Foo"
+ ]
+ },
+ {
+ "event": "closetag",
+ "data": [
+ "a"
+ ]
+ },
+ {
+ "event": "opentagname",
+ "data": [
+ "hr"
+ ]
+ },
+ {
+ "event": "opentag",
+ "data": [
+ "hr",
+ {}
+ ]
+ },
+ {
+ "event": "closetag",
+ "data": [
+ "hr"
+ ]
+ }
+ ]
}
\ No newline at end of file
diff --git a/test/Events/08-implicit-close-tags.json b/test/Events/08-implicit-close-tags.json
index 5d5b3ee28..fa646eb64 100644
--- a/test/Events/08-implicit-close-tags.json
+++ b/test/Events/08-implicit-close-tags.json
@@ -4,64 +4,70 @@
"html": "Heading 2
Para
Heading 4
",
"expected": [
{ "event": "opentagname", "data": [ "ol" ] },
- { "event": "opentag", "data": [ "ol", {} ] },
+ { "event": "opentag", "data": [ "ol", {} ] },
{ "event": "opentagname", "data": [ "li" ] },
- { "event": "attribute", "data": [ "class", "test" ] },
- { "event": "opentag", "data": [ "li", { "class": "test" } ] },
+ { "event": "attribute", "data": [ "class", "test" ] },
+ { "event": "opentag", "data": [ "li", { "class": "test" } ] },
{ "event": "opentagname", "data": [ "div" ] },
- { "event": "opentag", "data": [ "div", {} ] },
+ { "event": "opentag", "data": [ "div", {} ] },
{ "event": "opentagname", "data": [ "table" ] },
- { "event": "attribute", "data": [ "style", "width:100%" ] },
- { "event": "opentag", "data": [ "table", { "style": "width:100%" } ] },
+ { "event": "attribute", "data": [ "style", "width:100%" ] },
+ { "event": "opentag", "data": [ "table", { "style": "width:100%" } ] },
{ "event": "opentagname", "data": [ "tr" ] },
- { "event": "opentag", "data": [ "tr", {} ] },
+ { "event": "opentag", "data": [ "tr", {} ] },
{ "event": "opentagname", "data": [ "td" ] },
- { "event": "attribute", "data": [ "colspan", "2" ] },
- { "event": "opentag", "data": [ "td", { "colspan": "2" } ] },
+ { "event": "attribute", "data": [ "colspan", "2" ] },
+ { "event": "opentag", "data": [ "td", { "colspan": "2" } ] },
{ "event": "opentagname", "data": [ "h3" ] },
- { "event": "opentag", "data": [ "h3", {} ] },
- { "event": "text", "data": [ "Heading" ] },
- { "event": "closetag", "data": [ "h3" ] },
- { "event": "closetag", "data": [ "td" ] },
- { "event": "closetag", "data": [ "tr" ] },
+ { "event": "opentag", "data": [ "h3", {} ] },
+ { "event": "text", "data": [ "Heading" ] },
+ { "event": "content", "data": [ "Heading" ] },
+ { "event": "closetag", "data": [ "h3" ] },
+ { "event": "closetag", "data": [ "td" ] },
+ { "event": "closetag", "data": [ "tr" ] },
{ "event": "opentagname", "data": [ "tr" ] },
- { "event": "opentag", "data": [ "tr", {} ] },
+ { "event": "opentag", "data": [ "tr", {} ] },
{ "event": "opentagname", "data": [ "td" ] },
- { "event": "opentag", "data": [ "td", {} ] },
+ { "event": "opentag", "data": [ "td", {} ] },
{ "event": "opentagname", "data": [ "div" ] },
- { "event": "opentag", "data": [ "div", {} ] },
- { "event": "text", "data": [ "Div" ] },
- { "event": "closetag", "data": [ "div" ] },
- { "event": "closetag", "data": [ "td" ] },
+ { "event": "opentag", "data": [ "div", {} ] },
+ { "event": "text", "data": [ "Div" ] },
+ { "event": "content", "data": [ "Div" ] },
+ { "event": "closetag", "data": [ "div" ] },
+ { "event": "closetag", "data": [ "td" ] },
{ "event": "opentagname", "data": [ "td" ] },
- { "event": "opentag", "data": [ "td", {} ] },
+ { "event": "opentag", "data": [ "td", {} ] },
{ "event": "opentagname", "data": [ "div" ] },
- { "event": "opentag", "data": [ "div", {} ] },
- { "event": "text", "data": [ "Div2" ] },
- { "event": "closetag", "data": [ "div" ] },
- { "event": "closetag", "data": [ "td" ] },
- { "event": "closetag", "data": [ "tr" ] },
- { "event": "closetag", "data": [ "table" ] },
- { "event": "closetag", "data": [ "div" ] },
- { "event": "closetag", "data": [ "li" ] },
+ { "event": "opentag", "data": [ "div", {} ] },
+ { "event": "text", "data": [ "Div2" ] },
+ { "event": "content", "data": [ "Div2" ] },
+ { "event": "closetag", "data": [ "div" ] },
+ { "event": "closetag", "data": [ "td" ] },
+ { "event": "closetag", "data": [ "tr" ] },
+ { "event": "closetag", "data": [ "table" ] },
+ { "event": "closetag", "data": [ "div" ] },
+ { "event": "closetag", "data": [ "li" ] },
{ "event": "opentagname", "data": [ "li" ] },
- { "event": "opentag", "data": [ "li", {} ] },
+ { "event": "opentag", "data": [ "li", {} ] },
{ "event": "opentagname", "data": [ "div" ] },
- { "event": "opentag", "data": [ "div", {} ] },
+ { "event": "opentag", "data": [ "div", {} ] },
{ "event": "opentagname", "data": [ "h3" ] },
- { "event": "opentag", "data": [ "h3", {} ] },
- { "event": "text", "data": [ "Heading 2" ] },
- { "event": "closetag", "data": [ "h3" ] },
- { "event": "closetag", "data": [ "div" ] },
- { "event": "closetag", "data": [ "li" ] },
- { "event": "closetag", "data": [ "ol" ] },
+ { "event": "opentag", "data": [ "h3", {} ] },
+ { "event": "text", "data": [ "Heading 2" ] },
+ { "event": "content", "data": [ "Heading 2" ] },
+ { "event": "closetag", "data": [ "h3" ] },
+ { "event": "closetag", "data": [ "div" ] },
+ { "event": "closetag", "data": [ "li" ] },
+ { "event": "closetag", "data": [ "ol" ] },
{ "event": "opentagname", "data": [ "p" ] },
- { "event": "opentag", "data": [ "p", {} ] },
- { "event": "text", "data": [ "Para" ] },
- { "event": "closetag", "data": [ "p" ] },
+ { "event": "opentag", "data": [ "p", {} ] },
+ { "event": "text", "data": [ "Para" ] },
+ { "event": "content", "data": [ "Para" ] },
+ { "event": "closetag", "data": [ "p" ] },
{ "event": "opentagname", "data": [ "h4" ] },
- { "event": "opentag", "data": [ "h4", {} ] },
- { "event": "text", "data": [ "Heading 4" ] },
- { "event": "closetag", "data": [ "h4" ] }
+ { "event": "opentag", "data": [ "h4", {} ] },
+ { "event": "text", "data": [ "Heading 4" ] },
+ { "event": "content", "data": [ "Heading 4" ] },
+ { "event": "closetag", "data": [ "h4" ] }
]
}
\ No newline at end of file
diff --git a/test/Events/09-attributes.json b/test/Events/09-attributes.json
index afa6e4a96..3243502ff 100644
--- a/test/Events/09-attributes.json
+++ b/test/Events/09-attributes.json
@@ -58,6 +58,12 @@
"adsf"
]
},
+ {
+ "event": "content",
+ "data": [
+ "adsf"
+ ]
+ },
{
"event": "closetag",
"data": [
diff --git a/test/Events/10-crazy-attrib.json b/test/Events/10-crazy-attrib.json
index 00bad5f79..fd4a10188 100644
--- a/test/Events/10-crazy-attrib.json
+++ b/test/Events/10-crazy-attrib.json
@@ -42,6 +42,12 @@
"stuff"
]
},
+ {
+ "event": "content",
+ "data": [
+ "stuff"
+ ]
+ },
{
"event": "closetag",
"data": [
diff --git a/test/Events/11-script_in_script.json b/test/Events/11-script_in_script.json
index ddbb87c87..c59145673 100644
--- a/test/Events/11-script_in_script.json
+++ b/test/Events/11-script_in_script.json
@@ -38,6 +38,12 @@
"var str = '