Skip to content

Commit 769ffa2

Browse files
committed
url: treat special characters in hostnames more strictly in url.parse()
Throw if ^, |, and some other special characters are in the hostname, similar to WHATWG URL. Use punycode/toAscii when % appears in a hostname, like WHATWG URL.
1 parent fa7e08c commit 769ffa2

File tree

5 files changed

+45
-33
lines changed

5 files changed

+45
-33
lines changed

lib/internal/idna.js

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,4 @@
11
'use strict';
22

3-
if (internalBinding('config').hasIntl) {
4-
const { toASCII, toUnicode } = internalBinding('icu');
5-
module.exports = { toASCII, toUnicode };
6-
} else {
7-
const { domainToASCII, domainToUnicode } = require('internal/url');
8-
module.exports = { toASCII: domainToASCII, toUnicode: domainToUnicode };
9-
}
3+
const { domainToASCII, domainToUnicode } = require('internal/url');
4+
module.exports = { toASCII: domainToASCII, toUnicode: domainToUnicode };

lib/url.js

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,10 @@ const slashedProtocol = new SafeSet([
113113
'wss:',
114114
]);
115115
const {
116+
CHAR_LINE_FEED,
117+
CHAR_CARRIAGE_RETURN,
116118
CHAR_SPACE,
117119
CHAR_TAB,
118-
CHAR_CARRIAGE_RETURN,
119-
CHAR_LINE_FEED,
120120
CHAR_NO_BREAK_SPACE,
121121
CHAR_ZERO_WIDTH_NOBREAK_SPACE,
122122
CHAR_HASH,
@@ -130,7 +130,6 @@ const {
130130
CHAR_QUESTION_MARK,
131131
CHAR_DOUBLE_QUOTE,
132132
CHAR_SINGLE_QUOTE,
133-
CHAR_PERCENT,
134133
CHAR_SEMICOLON,
135134
CHAR_BACKWARD_SLASH,
136135
CHAR_CIRCUMFLEX_ACCENT,
@@ -314,6 +313,7 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
314313
let hostEnd = -1;
315314
let atSign = -1;
316315
let nonHost = -1;
316+
let invalid = -1;
317317
for (let i = 0; i < rest.length; ++i) {
318318
switch (rest.charCodeAt(i)) {
319319
case CHAR_TAB:
@@ -324,35 +324,37 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
324324
i -= 1;
325325
break;
326326
case CHAR_SPACE:
327-
case CHAR_DOUBLE_QUOTE:
328-
case CHAR_PERCENT:
329-
case CHAR_SINGLE_QUOTE:
330-
case CHAR_SEMICOLON:
331327
case CHAR_LEFT_ANGLE_BRACKET:
332328
case CHAR_RIGHT_ANGLE_BRACKET:
333-
case CHAR_BACKWARD_SLASH:
334329
case CHAR_CIRCUMFLEX_ACCENT:
330+
case CHAR_VERTICAL_LINE:
331+
case CHAR_DOUBLE_QUOTE:
332+
case CHAR_SINGLE_QUOTE:
335333
case CHAR_GRAVE_ACCENT:
336334
case CHAR_LEFT_CURLY_BRACKET:
337-
case CHAR_VERTICAL_LINE:
338335
case CHAR_RIGHT_CURLY_BRACKET:
339-
// Characters that are never ever allowed in a hostname from RFC 2396
340-
if (nonHost === -1)
341-
nonHost = i;
336+
// Characters that are never ever allowed in a hostname from RFC 2396
337+
if (invalid === -1) {
338+
invalid = i;
339+
}
342340
break;
341+
case CHAR_SEMICOLON:
342+
case CHAR_BACKWARD_SLASH:
343343
case CHAR_HASH:
344344
case CHAR_FORWARD_SLASH:
345345
case CHAR_QUESTION_MARK:
346346
// Find the first instance of any host-ending characters
347-
if (nonHost === -1)
347+
if (nonHost === -1) {
348348
nonHost = i;
349+
}
349350
hostEnd = i;
350351
break;
351352
case CHAR_AT:
352353
// At this point, either we have an explicit point where the
353354
// auth portion cannot go past, or the last @ char is the decider.
354355
atSign = i;
355356
nonHost = -1;
357+
invalid = -1;
356358
break;
357359
}
358360
if (hostEnd !== -1)
@@ -364,9 +366,15 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
364366
start = atSign + 1;
365367
}
366368
if (nonHost === -1) {
369+
if (invalid !== -1) {
370+
throw new ERR_INVALID_URL(url);
371+
}
367372
this.host = rest.slice(start);
368373
rest = '';
369374
} else {
375+
if (invalid > -1 && invalid < nonHost) {
376+
throw new ERR_INVALID_URL(url);
377+
}
370378
this.host = rest.slice(start, nonHost);
371379
rest = rest.slice(nonHost);
372380
}
@@ -509,13 +517,13 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
509517
function getHostname(self, rest, hostname, url) {
510518
for (let i = 0; i < hostname.length; ++i) {
511519
const code = hostname.charCodeAt(i);
512-
const isValid = (code !== CHAR_FORWARD_SLASH &&
513-
code !== CHAR_BACKWARD_SLASH &&
514-
code !== CHAR_HASH &&
515-
code !== CHAR_QUESTION_MARK &&
516-
code !== CHAR_COLON);
520+
const isHostEnd = (code === CHAR_FORWARD_SLASH ||
521+
code === CHAR_BACKWARD_SLASH ||
522+
code === CHAR_HASH ||
523+
code === CHAR_QUESTION_MARK ||
524+
code === CHAR_COLON);
517525

518-
if (!isValid) {
526+
if (isHostEnd) {
519527
// If leftover starts with :, then it represents an invalid port.
520528
if (hostname.charCodeAt(i) === 58) {
521529
throw new ERR_INVALID_URL(url);

test/parallel/test-url-format.js

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -67,12 +67,6 @@ const formatTests = {
6767
hash: '#frag=?bar/#frag',
6868
pathname: '/'
6969
},
70-
'http://google.com" onload="alert(42)/': {
71-
href: 'http://google.com/%22%20onload=%22alert(42)/',
72-
protocol: 'http:',
73-
host: 'google.com',
74-
pathname: '/%22%20onload=%22alert(42)/'
75-
},
7670
'http://a.com/a/b/c?s#h': {
7771
href: 'http://a.com/a/b/c?s#h',
7872
protocol: 'http',

test/parallel/test-url-parse-format.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1007,7 +1007,7 @@ for (const u in parseTests) {
10071007
assert.deepStrictEqual(
10081008
actual,
10091009
expected,
1010-
`parsing ${u} and expected ${inspect(expected)} but got ${inspect(actual)}`
1010+
`parsing ${inspect(u)}, expected ${inspect(expected)}, got ${inspect(actual)}`
10111011
);
10121012
assert.deepStrictEqual(
10131013
spaced,

test/parallel/test-url-parse-invalid-input.js

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,18 @@ if (common.hasIntl) {
8686
`parsing ${badURL}`);
8787
});
8888
}
89+
90+
{
91+
const badChars = [ ' ', '>', '<', '^', '|' ];
92+
for (const badChar of badChars) {
93+
const badURL = `http://fail${badChar}fail.com/`;
94+
assert.throws(() => { url.parse(badURL); },
95+
(e) => e.code === 'ERR_INVALID_URL',
96+
`parsing "${badURL}"`);
97+
}
98+
}
99+
100+
assert.throws(() => { url.parse('http://google.com" onload="alert(42)/'); },
101+
(e) => e.code === 'ERR_INVALID_URL',
102+
'parsing \'http://google.com" onload="alert(42)/\''
103+
);

0 commit comments

Comments
 (0)