Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions lib/Parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,10 @@ Parser.prototype.ontext = function(data){
if(this._cbs.ontext) this._cbs.ontext(data);
};

Parser.prototype.oncontent = function(data){
if(this._cbs.oncontent) this._cbs.oncontent(data);
};

Parser.prototype.onopentagname = function(name){
if(this._lowerCaseTagNames){
name = name.toLowerCase();
Expand Down Expand Up @@ -286,6 +290,7 @@ Parser.prototype.oncdata = function(value){
if(this._options.xmlMode || this._options.recognizeCDATA){
if(this._cbs.oncdatastart) this._cbs.oncdatastart();
if(this._cbs.ontext) this._cbs.ontext(value);
if(this._cbs.oncontent) this._cbs.oncontent(value);
if(this._cbs.oncdataend) this._cbs.oncdataend();
} else {
this.oncomment("[CDATA[" + value + "]]");
Expand Down
95 changes: 53 additions & 42 deletions lib/Tokenizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ function consumeSpecialNameChar(upper, NEXT_STATE){
function Tokenizer(options, cbs){
this._state = TEXT;
this._buffer = "";
this._content = "";
this._sectionStart = 0;
this._index = 0;
this._bufferOffset = 0; //chars removed from _buffer
Expand All @@ -148,15 +149,11 @@ function Tokenizer(options, cbs){

Tokenizer.prototype._stateText = function(c){
if(c === "<"){
if(this._index > this._sectionStart){
this._cbs.ontext(this._getSection());
}
this._flushText();
this._state = BEFORE_TAG_NAME;
this._sectionStart = this._index;
} else if(this._decodeEntities && this._special === SPECIAL_NONE && c === "&"){
if(this._index > this._sectionStart){
this._cbs.ontext(this._getSection());
}
this._flushText();
this._baseState = TEXT;
this._state = BEFORE_ENTITY;
this._sectionStart = this._index;
Expand All @@ -175,7 +172,7 @@ Tokenizer.prototype._stateBeforeTagName = function(c){
this._state = IN_PROCESSING_INSTRUCTION;
this._sectionStart = this._index + 1;
} else if(c === "<"){
this._cbs.ontext(this._getSection());
this._flushText();
this._sectionStart = this._index;
} else {
this._state = (!this._xmlMode && (c === "s" || c === "S")) ?
Expand All @@ -186,6 +183,7 @@ Tokenizer.prototype._stateBeforeTagName = function(c){

Tokenizer.prototype._stateInTagName = function(c){
if(c === "/" || c === ">" || whitespace(c)){
this._flushContent();
this._emitToken("onopentagname");
this._state = BEFORE_ATTRIBUTE_NAME;
this._index--;
Expand All @@ -211,6 +209,7 @@ Tokenizer.prototype._stateBeforeCloseingTagName = function(c){

Tokenizer.prototype._stateInCloseingTagName = function(c){
if(c === ">" || whitespace(c)){
this._flushContent();
this._emitToken("onclosetag");
this._state = AFTER_CLOSING_TAG_NAME;
this._index--;
Expand Down Expand Up @@ -602,9 +601,7 @@ Tokenizer.prototype._cleanup = function (){
this._bufferOffset += this._index;
} else if(this._running){
if(this._state === TEXT){
if(this._sectionStart !== this._index){
this._cbs.ontext(this._buffer.substr(this._sectionStart));
}
this._flushText();
this._buffer = "";
this._index = 0;
this._bufferOffset += this._index;
Expand Down Expand Up @@ -831,51 +828,49 @@ Tokenizer.prototype.end = function(chunk){

Tokenizer.prototype._finish = function(){
//if there is remaining data, emit it in a reasonable way
if(this._sectionStart < this._index){
this._handleTrailingData();
}

this._handleTrailingData();
this._cbs.onend();
};

Tokenizer.prototype._handleTrailingData = function(){
var data = this._buffer.substr(this._sectionStart);

if(this._state === IN_CDATA || this._state === AFTER_CDATA_1 || this._state === AFTER_CDATA_2){
this._cbs.oncdata(data);
} else if(this._state === IN_COMMENT || this._state === AFTER_COMMENT_1 || this._state === AFTER_COMMENT_2){
this._cbs.oncomment(data);
} else if(this._state === IN_NAMED_ENTITY && !this._xmlMode){
this._parseLegacyEntity();
if(this._sectionStart < this._index){

if(this._sectionStart < this._index){
var data = this._buffer.substr(this._sectionStart);
if(this._state === IN_CDATA || this._state === AFTER_CDATA_1 || this._state === AFTER_CDATA_2){
this._cbs.oncdata(data);
} else if(this._state === IN_COMMENT || this._state === AFTER_COMMENT_1 || this._state === AFTER_COMMENT_2){
this._cbs.oncomment(data);
} else if(this._state === IN_NAMED_ENTITY && !this._xmlMode){
this._parseLegacyEntity();
this._state = this._baseState;
this._handleTrailingData();
}
} else if(this._state === IN_NUMERIC_ENTITY && !this._xmlMode){
this._decodeNumericEntity(2, 10);
if(this._sectionStart < this._index){
} else if(this._state === IN_NUMERIC_ENTITY && !this._xmlMode){
this._decodeNumericEntity(2, 10);
this._state = this._baseState;
this._handleTrailingData();
}
} else if(this._state === IN_HEX_ENTITY && !this._xmlMode){
this._decodeNumericEntity(3, 16);
if(this._sectionStart < this._index){
} else if(this._state === IN_HEX_ENTITY && !this._xmlMode){
this._decodeNumericEntity(3, 16);
this._state = this._baseState;
this._handleTrailingData();
} else if(
this._state !== IN_TAG_NAME &&
this._state !== BEFORE_ATTRIBUTE_NAME &&
this._state !== BEFORE_ATTRIBUTE_VALUE &&
this._state !== AFTER_ATTRIBUTE_NAME &&
this._state !== IN_ATTRIBUTE_NAME &&
this._state !== IN_ATTRIBUTE_VALUE_SQ &&
this._state !== IN_ATTRIBUTE_VALUE_DQ &&
this._state !== IN_ATTRIBUTE_VALUE_NQ &&
this._state !== IN_CLOSING_TAG_NAME
){
this._flushText();
this._flushContent();
}
} else if(
this._state !== IN_TAG_NAME &&
this._state !== BEFORE_ATTRIBUTE_NAME &&
this._state !== BEFORE_ATTRIBUTE_VALUE &&
this._state !== AFTER_ATTRIBUTE_NAME &&
this._state !== IN_ATTRIBUTE_NAME &&
this._state !== IN_ATTRIBUTE_VALUE_SQ &&
this._state !== IN_ATTRIBUTE_VALUE_DQ &&
this._state !== IN_ATTRIBUTE_VALUE_NQ &&
this._state !== IN_CLOSING_TAG_NAME
){
this._cbs.ontext(data);
} else if(this._state === TEXT){
this._flushContent();
}

//else, ignore remaining data
//TODO add a way to remove current tag
};
Expand All @@ -902,5 +897,21 @@ Tokenizer.prototype._emitPartial = function(value){
this._cbs.onattribdata(value); //TODO implement the new event
} else {
this._cbs.ontext(value);
this._content += value;
}
};

Tokenizer.prototype._flushText = function(){
if(this._index > this._sectionStart){
var text = this._getSection();
this._cbs.ontext(text);
this._content += text;
}
};
Tokenizer.prototype._flushContent = function(){
if(!this._content){
return;
}
this._cbs.oncontent(this._content);
this._content = "";
};
1 change: 1 addition & 0 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ module.exports = {
cdatastart: 0,
cdataend: 0,
text: 1,
content: 1,
processinginstruction: 2,
comment: 1,
commentend: 0,
Expand Down
6 changes: 6 additions & 0 deletions test/Events/01-simple.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@
"adsf"
]
},
{
"event": "content",
"data": [
"adsf"
]
},
{
"event": "closetag",
"data": [
Expand Down
6 changes: 6 additions & 0 deletions test/Events/02-template.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@
"<h1>Heading1</h1>"
]
},
{
"event": "content",
"data": [
"<h1>Heading1</h1>"
]
},
{
"event": "closetag",
"data": [
Expand Down
6 changes: 6 additions & 0 deletions test/Events/03-lowercase_tags.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@
"adsf"
]
},
{
"event": "content",
"data": [
"adsf"
]
},
{
"event": "closetag",
"data": [
Expand Down
6 changes: 6 additions & 0 deletions test/Events/04-cdata.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@
" asdf ><asdf></adsf><> fo"
]
},
{
"event": "content",
"data": [
" asdf ><asdf></adsf><> fo"
]
},
{
"event": "cdataend",
"data": []
Expand Down
6 changes: 6 additions & 0 deletions test/Events/05-cdata-special.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@
"/*<![CDATA[*/ asdf ><asdf></adsf><> fo/*]]>*/"
]
},
{
"event": "content",
"data": [
"/*<![CDATA[*/ asdf ><asdf></adsf><> fo/*]]>*/"
]
},
{
"event": "closetag",
"data": [
Expand Down
6 changes: 6 additions & 0 deletions test/Events/06-leading-lt.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@
"data": [
">a>"
]
},
{
"event": "content",
"data": [
">a>"
]
}
]
}
114 changes: 60 additions & 54 deletions test/Events/07-self-closing.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,58 +10,64 @@
},
"html": "<a href=http://test.com/>Foo</a><hr / >",
"expected": [
{
"event": "opentagname",
"data": [
"a"
]
},
{
"event": "attribute",
"data": [
"href",
"http://test.com/"
]
},
{
"event": "opentag",
"data": [
"a",
{
"href": "http://test.com/"
}
]
},
{
"event": "text",
"data": [
"Foo"
]
},
{
"event": "closetag",
"data": [
"a"
]
},
{
"event": "opentagname",
"data": [
"hr"
]
},
{
"event": "opentag",
"data": [
"hr",
{}
]
},
{
"event": "closetag",
"data": [
"hr"
]
}
]
{
"event": "opentagname",
"data": [
"a"
]
},
{
"event": "attribute",
"data": [
"href",
"http://test.com/"
]
},
{
"event": "opentag",
"data": [
"a",
{
"href": "http://test.com/"
}
]
},
{
"event": "text",
"data": [
"Foo"
]
},
{
"event": "content",
"data": [
"Foo"
]
},
{
"event": "closetag",
"data": [
"a"
]
},
{
"event": "opentagname",
"data": [
"hr"
]
},
{
"event": "opentag",
"data": [
"hr",
{}
]
},
{
"event": "closetag",
"data": [
"hr"
]
}
]
}
Loading