diff --git a/lib/Parser.js b/lib/Parser.js index 3db01b4fc..4d9ef5cd2 100644 --- a/lib/Parser.js +++ b/lib/Parser.js @@ -139,6 +139,10 @@ Parser.prototype.ontext = function(data){ if(this._cbs.ontext) this._cbs.ontext(data); }; +Parser.prototype.oncontent = function(data){ + if(this._cbs.oncontent) this._cbs.oncontent(data); +}; + Parser.prototype.onopentagname = function(name){ if(this._lowerCaseTagNames){ name = name.toLowerCase(); @@ -286,6 +290,7 @@ Parser.prototype.oncdata = function(value){ if(this._options.xmlMode || this._options.recognizeCDATA){ if(this._cbs.oncdatastart) this._cbs.oncdatastart(); if(this._cbs.ontext) this._cbs.ontext(value); + if(this._cbs.oncontent) this._cbs.oncontent(value); if(this._cbs.oncdataend) this._cbs.oncdataend(); } else { this.oncomment("[CDATA[" + value + "]]"); diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js index ec013c127..cb24c75fc 100644 --- a/lib/Tokenizer.js +++ b/lib/Tokenizer.js @@ -134,6 +134,7 @@ function consumeSpecialNameChar(upper, NEXT_STATE){ function Tokenizer(options, cbs){ this._state = TEXT; this._buffer = ""; + this._content = ""; this._sectionStart = 0; this._index = 0; this._bufferOffset = 0; //chars removed from _buffer @@ -148,15 +149,11 @@ function Tokenizer(options, cbs){ Tokenizer.prototype._stateText = function(c){ if(c === "<"){ - if(this._index > this._sectionStart){ - this._cbs.ontext(this._getSection()); - } + this._flushText(); this._state = BEFORE_TAG_NAME; this._sectionStart = this._index; } else if(this._decodeEntities && this._special === SPECIAL_NONE && c === "&"){ - if(this._index > this._sectionStart){ - this._cbs.ontext(this._getSection()); - } + this._flushText(); this._baseState = TEXT; this._state = BEFORE_ENTITY; this._sectionStart = this._index; @@ -175,7 +172,7 @@ Tokenizer.prototype._stateBeforeTagName = function(c){ this._state = IN_PROCESSING_INSTRUCTION; this._sectionStart = this._index + 1; } else if(c === "<"){ - this._cbs.ontext(this._getSection()); + this._flushText(); this._sectionStart = this._index; } else { this._state = (!this._xmlMode && (c === "s" || c === "S")) ? @@ -186,6 +183,7 @@ Tokenizer.prototype._stateBeforeTagName = function(c){ Tokenizer.prototype._stateInTagName = function(c){ if(c === "/" || c === ">" || whitespace(c)){ + this._flushContent(); this._emitToken("onopentagname"); this._state = BEFORE_ATTRIBUTE_NAME; this._index--; @@ -211,6 +209,7 @@ Tokenizer.prototype._stateBeforeCloseingTagName = function(c){ Tokenizer.prototype._stateInCloseingTagName = function(c){ if(c === ">" || whitespace(c)){ + this._flushContent(); this._emitToken("onclosetag"); this._state = AFTER_CLOSING_TAG_NAME; this._index--; @@ -602,9 +601,7 @@ Tokenizer.prototype._cleanup = function (){ this._bufferOffset += this._index; } else if(this._running){ if(this._state === TEXT){ - if(this._sectionStart !== this._index){ - this._cbs.ontext(this._buffer.substr(this._sectionStart)); - } + this._flushText(); this._buffer = ""; this._index = 0; this._bufferOffset += this._index; @@ -831,51 +828,49 @@ Tokenizer.prototype.end = function(chunk){ Tokenizer.prototype._finish = function(){ //if there is remaining data, emit it in a reasonable way - if(this._sectionStart < this._index){ - this._handleTrailingData(); - } + this._handleTrailingData(); this._cbs.onend(); }; Tokenizer.prototype._handleTrailingData = function(){ - var data = this._buffer.substr(this._sectionStart); - - if(this._state === IN_CDATA || this._state === AFTER_CDATA_1 || this._state === AFTER_CDATA_2){ - this._cbs.oncdata(data); - } else if(this._state === IN_COMMENT || this._state === AFTER_COMMENT_1 || this._state === AFTER_COMMENT_2){ - this._cbs.oncomment(data); - } else if(this._state === IN_NAMED_ENTITY && !this._xmlMode){ - this._parseLegacyEntity(); - if(this._sectionStart < this._index){ + + if(this._sectionStart < this._index){ + var data = this._buffer.substr(this._sectionStart); + if(this._state === IN_CDATA || this._state === AFTER_CDATA_1 || this._state === AFTER_CDATA_2){ + this._cbs.oncdata(data); + } else if(this._state === IN_COMMENT || this._state === AFTER_COMMENT_1 || this._state === AFTER_COMMENT_2){ + this._cbs.oncomment(data); + } else if(this._state === IN_NAMED_ENTITY && !this._xmlMode){ + this._parseLegacyEntity(); this._state = this._baseState; this._handleTrailingData(); - } - } else if(this._state === IN_NUMERIC_ENTITY && !this._xmlMode){ - this._decodeNumericEntity(2, 10); - if(this._sectionStart < this._index){ + } else if(this._state === IN_NUMERIC_ENTITY && !this._xmlMode){ + this._decodeNumericEntity(2, 10); this._state = this._baseState; this._handleTrailingData(); - } - } else if(this._state === IN_HEX_ENTITY && !this._xmlMode){ - this._decodeNumericEntity(3, 16); - if(this._sectionStart < this._index){ + } else if(this._state === IN_HEX_ENTITY && !this._xmlMode){ + this._decodeNumericEntity(3, 16); this._state = this._baseState; this._handleTrailingData(); + } else if( + this._state !== IN_TAG_NAME && + this._state !== BEFORE_ATTRIBUTE_NAME && + this._state !== BEFORE_ATTRIBUTE_VALUE && + this._state !== AFTER_ATTRIBUTE_NAME && + this._state !== IN_ATTRIBUTE_NAME && + this._state !== IN_ATTRIBUTE_VALUE_SQ && + this._state !== IN_ATTRIBUTE_VALUE_DQ && + this._state !== IN_ATTRIBUTE_VALUE_NQ && + this._state !== IN_CLOSING_TAG_NAME + ){ + this._flushText(); + this._flushContent(); } - } else if( - this._state !== IN_TAG_NAME && - this._state !== BEFORE_ATTRIBUTE_NAME && - this._state !== BEFORE_ATTRIBUTE_VALUE && - this._state !== AFTER_ATTRIBUTE_NAME && - this._state !== IN_ATTRIBUTE_NAME && - this._state !== IN_ATTRIBUTE_VALUE_SQ && - this._state !== IN_ATTRIBUTE_VALUE_DQ && - this._state !== IN_ATTRIBUTE_VALUE_NQ && - this._state !== IN_CLOSING_TAG_NAME - ){ - this._cbs.ontext(data); + } else if(this._state === TEXT){ + this._flushContent(); } + //else, ignore remaining data //TODO add a way to remove current tag }; @@ -902,5 +897,21 @@ Tokenizer.prototype._emitPartial = function(value){ this._cbs.onattribdata(value); //TODO implement the new event } else { this._cbs.ontext(value); + this._content += value; + } +}; + +Tokenizer.prototype._flushText = function(){ + if(this._index > this._sectionStart){ + var text = this._getSection(); + this._cbs.ontext(text); + this._content += text; + } +}; +Tokenizer.prototype._flushContent = function(){ + if(!this._content){ + return; } + this._cbs.oncontent(this._content); + this._content = ""; }; diff --git a/lib/index.js b/lib/index.js index 880f57e90..5dd44bfb0 100644 --- a/lib/index.js +++ b/lib/index.js @@ -56,6 +56,7 @@ module.exports = { cdatastart: 0, cdataend: 0, text: 1, + content: 1, processinginstruction: 2, comment: 1, commentend: 0, diff --git a/test/Events/01-simple.json b/test/Events/01-simple.json index ab3076ac5..4e753232c 100644 --- a/test/Events/01-simple.json +++ b/test/Events/01-simple.json @@ -34,6 +34,12 @@ "adsf" ] }, + { + "event": "content", + "data": [ + "adsf" + ] + }, { "event": "closetag", "data": [ diff --git a/test/Events/02-template.json b/test/Events/02-template.json index df344b6a2..abc4bcb45 100644 --- a/test/Events/02-template.json +++ b/test/Events/02-template.json @@ -47,6 +47,12 @@ "

Heading1

" ] }, + { + "event": "content", + "data": [ + "

Heading1

" + ] + }, { "event": "closetag", "data": [ diff --git a/test/Events/03-lowercase_tags.json b/test/Events/03-lowercase_tags.json index 9b58c5999..90373aeae 100644 --- a/test/Events/03-lowercase_tags.json +++ b/test/Events/03-lowercase_tags.json @@ -36,6 +36,12 @@ "adsf" ] }, + { + "event": "content", + "data": [ + "adsf" + ] + }, { "event": "closetag", "data": [ diff --git a/test/Events/04-cdata.json b/test/Events/04-cdata.json index 6032b6882..7ff133715 100644 --- a/test/Events/04-cdata.json +++ b/test/Events/04-cdata.json @@ -29,6 +29,12 @@ " asdf ><> fo" ] }, + { + "event": "content", + "data": [ + " asdf ><> fo" + ] + }, { "event": "cdataend", "data": [] diff --git a/test/Events/05-cdata-special.json b/test/Events/05-cdata-special.json index 686cb1a2f..6ffb10551 100644 --- a/test/Events/05-cdata-special.json +++ b/test/Events/05-cdata-special.json @@ -25,6 +25,12 @@ "/*<> fo/*]]>*/" ] }, + { + "event": "content", + "data": [ + "/*<> fo/*]]>*/" + ] + }, { "event": "closetag", "data": [ diff --git a/test/Events/06-leading-lt.json b/test/Events/06-leading-lt.json index fcec85289..1ad0d0a75 100644 --- a/test/Events/06-leading-lt.json +++ b/test/Events/06-leading-lt.json @@ -11,6 +11,12 @@ "data": [ ">a>" ] + }, + { + "event": "content", + "data": [ + ">a>" + ] } ] } \ No newline at end of file diff --git a/test/Events/07-self-closing.json b/test/Events/07-self-closing.json index 49ed93b85..6142b4370 100644 --- a/test/Events/07-self-closing.json +++ b/test/Events/07-self-closing.json @@ -10,58 +10,64 @@ }, "html": "Foo
", "expected": [ - { - "event": "opentagname", - "data": [ - "a" - ] - }, - { - "event": "attribute", - "data": [ - "href", - "http://test.com/" - ] - }, - { - "event": "opentag", - "data": [ - "a", - { - "href": "http://test.com/" - } - ] - }, - { - "event": "text", - "data": [ - "Foo" - ] - }, - { - "event": "closetag", - "data": [ - "a" - ] - }, - { - "event": "opentagname", - "data": [ - "hr" - ] - }, - { - "event": "opentag", - "data": [ - "hr", - {} - ] - }, - { - "event": "closetag", - "data": [ - "hr" - ] - } - ] + { + "event": "opentagname", + "data": [ + "a" + ] + }, + { + "event": "attribute", + "data": [ + "href", + "http://test.com/" + ] + }, + { + "event": "opentag", + "data": [ + "a", + { + "href": "http://test.com/" + } + ] + }, + { + "event": "text", + "data": [ + "Foo" + ] + }, + { + "event": "content", + "data": [ + "Foo" + ] + }, + { + "event": "closetag", + "data": [ + "a" + ] + }, + { + "event": "opentagname", + "data": [ + "hr" + ] + }, + { + "event": "opentag", + "data": [ + "hr", + {} + ] + }, + { + "event": "closetag", + "data": [ + "hr" + ] + } + ] } \ No newline at end of file diff --git a/test/Events/08-implicit-close-tags.json b/test/Events/08-implicit-close-tags.json index 5d5b3ee28..fa646eb64 100644 --- a/test/Events/08-implicit-close-tags.json +++ b/test/Events/08-implicit-close-tags.json @@ -4,64 +4,70 @@ "html": "
  1. Heading

    Div
    Div2
  2. Heading 2

Para

Heading 4

", "expected": [ { "event": "opentagname", "data": [ "ol" ] }, - { "event": "opentag", "data": [ "ol", {} ] }, + { "event": "opentag", "data": [ "ol", {} ] }, { "event": "opentagname", "data": [ "li" ] }, - { "event": "attribute", "data": [ "class", "test" ] }, - { "event": "opentag", "data": [ "li", { "class": "test" } ] }, + { "event": "attribute", "data": [ "class", "test" ] }, + { "event": "opentag", "data": [ "li", { "class": "test" } ] }, { "event": "opentagname", "data": [ "div" ] }, - { "event": "opentag", "data": [ "div", {} ] }, + { "event": "opentag", "data": [ "div", {} ] }, { "event": "opentagname", "data": [ "table" ] }, - { "event": "attribute", "data": [ "style", "width:100%" ] }, - { "event": "opentag", "data": [ "table", { "style": "width:100%" } ] }, + { "event": "attribute", "data": [ "style", "width:100%" ] }, + { "event": "opentag", "data": [ "table", { "style": "width:100%" } ] }, { "event": "opentagname", "data": [ "tr" ] }, - { "event": "opentag", "data": [ "tr", {} ] }, + { "event": "opentag", "data": [ "tr", {} ] }, { "event": "opentagname", "data": [ "td" ] }, - { "event": "attribute", "data": [ "colspan", "2" ] }, - { "event": "opentag", "data": [ "td", { "colspan": "2" } ] }, + { "event": "attribute", "data": [ "colspan", "2" ] }, + { "event": "opentag", "data": [ "td", { "colspan": "2" } ] }, { "event": "opentagname", "data": [ "h3" ] }, - { "event": "opentag", "data": [ "h3", {} ] }, - { "event": "text", "data": [ "Heading" ] }, - { "event": "closetag", "data": [ "h3" ] }, - { "event": "closetag", "data": [ "td" ] }, - { "event": "closetag", "data": [ "tr" ] }, + { "event": "opentag", "data": [ "h3", {} ] }, + { "event": "text", "data": [ "Heading" ] }, + { "event": "content", "data": [ "Heading" ] }, + { "event": "closetag", "data": [ "h3" ] }, + { "event": "closetag", "data": [ "td" ] }, + { "event": "closetag", "data": [ "tr" ] }, { "event": "opentagname", "data": [ "tr" ] }, - { "event": "opentag", "data": [ "tr", {} ] }, + { "event": "opentag", "data": [ "tr", {} ] }, { "event": "opentagname", "data": [ "td" ] }, - { "event": "opentag", "data": [ "td", {} ] }, + { "event": "opentag", "data": [ "td", {} ] }, { "event": "opentagname", "data": [ "div" ] }, - { "event": "opentag", "data": [ "div", {} ] }, - { "event": "text", "data": [ "Div" ] }, - { "event": "closetag", "data": [ "div" ] }, - { "event": "closetag", "data": [ "td" ] }, + { "event": "opentag", "data": [ "div", {} ] }, + { "event": "text", "data": [ "Div" ] }, + { "event": "content", "data": [ "Div" ] }, + { "event": "closetag", "data": [ "div" ] }, + { "event": "closetag", "data": [ "td" ] }, { "event": "opentagname", "data": [ "td" ] }, - { "event": "opentag", "data": [ "td", {} ] }, + { "event": "opentag", "data": [ "td", {} ] }, { "event": "opentagname", "data": [ "div" ] }, - { "event": "opentag", "data": [ "div", {} ] }, - { "event": "text", "data": [ "Div2" ] }, - { "event": "closetag", "data": [ "div" ] }, - { "event": "closetag", "data": [ "td" ] }, - { "event": "closetag", "data": [ "tr" ] }, - { "event": "closetag", "data": [ "table" ] }, - { "event": "closetag", "data": [ "div" ] }, - { "event": "closetag", "data": [ "li" ] }, + { "event": "opentag", "data": [ "div", {} ] }, + { "event": "text", "data": [ "Div2" ] }, + { "event": "content", "data": [ "Div2" ] }, + { "event": "closetag", "data": [ "div" ] }, + { "event": "closetag", "data": [ "td" ] }, + { "event": "closetag", "data": [ "tr" ] }, + { "event": "closetag", "data": [ "table" ] }, + { "event": "closetag", "data": [ "div" ] }, + { "event": "closetag", "data": [ "li" ] }, { "event": "opentagname", "data": [ "li" ] }, - { "event": "opentag", "data": [ "li", {} ] }, + { "event": "opentag", "data": [ "li", {} ] }, { "event": "opentagname", "data": [ "div" ] }, - { "event": "opentag", "data": [ "div", {} ] }, + { "event": "opentag", "data": [ "div", {} ] }, { "event": "opentagname", "data": [ "h3" ] }, - { "event": "opentag", "data": [ "h3", {} ] }, - { "event": "text", "data": [ "Heading 2" ] }, - { "event": "closetag", "data": [ "h3" ] }, - { "event": "closetag", "data": [ "div" ] }, - { "event": "closetag", "data": [ "li" ] }, - { "event": "closetag", "data": [ "ol" ] }, + { "event": "opentag", "data": [ "h3", {} ] }, + { "event": "text", "data": [ "Heading 2" ] }, + { "event": "content", "data": [ "Heading 2" ] }, + { "event": "closetag", "data": [ "h3" ] }, + { "event": "closetag", "data": [ "div" ] }, + { "event": "closetag", "data": [ "li" ] }, + { "event": "closetag", "data": [ "ol" ] }, { "event": "opentagname", "data": [ "p" ] }, - { "event": "opentag", "data": [ "p", {} ] }, - { "event": "text", "data": [ "Para" ] }, - { "event": "closetag", "data": [ "p" ] }, + { "event": "opentag", "data": [ "p", {} ] }, + { "event": "text", "data": [ "Para" ] }, + { "event": "content", "data": [ "Para" ] }, + { "event": "closetag", "data": [ "p" ] }, { "event": "opentagname", "data": [ "h4" ] }, - { "event": "opentag", "data": [ "h4", {} ] }, - { "event": "text", "data": [ "Heading 4" ] }, - { "event": "closetag", "data": [ "h4" ] } + { "event": "opentag", "data": [ "h4", {} ] }, + { "event": "text", "data": [ "Heading 4" ] }, + { "event": "content", "data": [ "Heading 4" ] }, + { "event": "closetag", "data": [ "h4" ] } ] } \ No newline at end of file diff --git a/test/Events/09-attributes.json b/test/Events/09-attributes.json index afa6e4a96..3243502ff 100644 --- a/test/Events/09-attributes.json +++ b/test/Events/09-attributes.json @@ -58,6 +58,12 @@ "adsf" ] }, + { + "event": "content", + "data": [ + "adsf" + ] + }, { "event": "closetag", "data": [ diff --git a/test/Events/10-crazy-attrib.json b/test/Events/10-crazy-attrib.json index 00bad5f79..fd4a10188 100644 --- a/test/Events/10-crazy-attrib.json +++ b/test/Events/10-crazy-attrib.json @@ -42,6 +42,12 @@ "stuff" ] }, + { + "event": "content", + "data": [ + "stuff" + ] + }, { "event": "closetag", "data": [ diff --git a/test/Events/11-script_in_script.json b/test/Events/11-script_in_script.json index ddbb87c87..c59145673 100644 --- a/test/Events/11-script_in_script.json +++ b/test/Events/11-script_in_script.json @@ -38,6 +38,12 @@ "var str = '