Skip to content

Commit 380bbd3

Browse files
committed
url: treat special characters in hostnames more strictly in url.parse()
Remove tabs/newlines/carriage returns, similar to WHATWG URL. Throw if ^, |, and some other special characters are in the hostname, similar to WHATWG URL. Use punycode/toAscii when % appears in a hostname, like WHATWG URL.
1 parent 87cdf7d commit 380bbd3

File tree

5 files changed

+51
-40
lines changed

5 files changed

+51
-40
lines changed

lib/internal/idna.js

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,4 @@
11
'use strict';
22

3-
if (internalBinding('config').hasIntl) {
4-
const { toASCII, toUnicode } = internalBinding('icu');
5-
module.exports = { toASCII, toUnicode };
6-
} else {
7-
const { domainToASCII, domainToUnicode } = require('internal/url');
8-
module.exports = { toASCII: domainToASCII, toUnicode: domainToUnicode };
9-
}
3+
const { domainToASCII, domainToUnicode } = require('internal/url');
4+
module.exports = { toASCII: domainToASCII, toUnicode: domainToUnicode };

lib/url.js

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,6 @@ const slashedProtocol = new SafeSet([
115115
const {
116116
CHAR_SPACE,
117117
CHAR_TAB,
118-
CHAR_CARRIAGE_RETURN,
119-
CHAR_LINE_FEED,
120118
CHAR_NO_BREAK_SPACE,
121119
CHAR_ZERO_WIDTH_NOBREAK_SPACE,
122120
CHAR_HASH,
@@ -314,41 +312,42 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
314312
let hostEnd = -1;
315313
let atSign = -1;
316314
let nonHost = -1;
315+
let invalid = -1;
317316
for (let i = 0; i < rest.length; ++i) {
318317
switch (rest.charCodeAt(i)) {
319-
case CHAR_TAB:
320-
case CHAR_LINE_FEED:
321-
case CHAR_CARRIAGE_RETURN:
322318
case CHAR_SPACE:
323-
case CHAR_DOUBLE_QUOTE:
324-
case CHAR_PERCENT:
325-
case CHAR_SINGLE_QUOTE:
326-
case CHAR_SEMICOLON:
327319
case CHAR_LEFT_ANGLE_BRACKET:
328320
case CHAR_RIGHT_ANGLE_BRACKET:
329-
case CHAR_BACKWARD_SLASH:
330321
case CHAR_CIRCUMFLEX_ACCENT:
322+
case CHAR_VERTICAL_LINE:
323+
case CHAR_TAB:
324+
case CHAR_DOUBLE_QUOTE:
325+
case CHAR_SINGLE_QUOTE:
331326
case CHAR_GRAVE_ACCENT:
332327
case CHAR_LEFT_CURLY_BRACKET:
333-
case CHAR_VERTICAL_LINE:
334328
case CHAR_RIGHT_CURLY_BRACKET:
335-
// Characters that are never ever allowed in a hostname from RFC 2396
336-
if (nonHost === -1)
337-
nonHost = i;
329+
// Characters that are never ever allowed in a hostname from RFC 2396
330+
if (invalid === -1) {
331+
invalid = i;
332+
}
338333
break;
334+
case CHAR_SEMICOLON:
335+
case CHAR_BACKWARD_SLASH:
339336
case CHAR_HASH:
340337
case CHAR_FORWARD_SLASH:
341338
case CHAR_QUESTION_MARK:
342339
// Find the first instance of any host-ending characters
343-
if (nonHost === -1)
340+
if (nonHost === -1) {
344341
nonHost = i;
342+
}
345343
hostEnd = i;
346344
break;
347345
case CHAR_AT:
348346
// At this point, either we have an explicit point where the
349347
// auth portion cannot go past, or the last @ char is the decider.
350348
atSign = i;
351349
nonHost = -1;
350+
invalid = -1;
352351
break;
353352
}
354353
if (hostEnd !== -1)
@@ -360,10 +359,18 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
360359
start = atSign + 1;
361360
}
362361
if (nonHost === -1) {
362+
if (invalid !== -1) {
363+
throw new ERR_INVALID_URL(url);
364+
}
363365
this.host = rest.slice(start);
364366
rest = '';
365367
} else {
368+
if (invalid > -1 && invalid < nonHost) {
369+
throw new ERR_INVALID_URL(url);
370+
}
366371
this.host = rest.slice(start, nonHost);
372+
// WHATWG URL removes tabs, newlines, and carriage returns. Let's do that too.
373+
this.host = this.host.replaceAll(/[\t\n\r]/g, '');
367374
rest = rest.slice(nonHost);
368375
}
369376

@@ -505,13 +512,13 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
505512
function getHostname(self, rest, hostname, url) {
506513
for (let i = 0; i < hostname.length; ++i) {
507514
const code = hostname.charCodeAt(i);
508-
const isValid = (code !== CHAR_FORWARD_SLASH &&
509-
code !== CHAR_BACKWARD_SLASH &&
510-
code !== CHAR_HASH &&
511-
code !== CHAR_QUESTION_MARK &&
512-
code !== CHAR_COLON);
515+
const isHostEnd = (code === CHAR_FORWARD_SLASH ||
516+
code === CHAR_BACKWARD_SLASH ||
517+
code === CHAR_HASH ||
518+
code === CHAR_QUESTION_MARK ||
519+
code === CHAR_COLON);
513520

514-
if (!isValid) {
521+
if (isHostEnd) {
515522
// If leftover starts with :, then it represents an invalid port.
516523
if (hostname.charCodeAt(i) === 58) {
517524
throw new ERR_INVALID_URL(url);

test/parallel/test-url-format.js

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -67,12 +67,6 @@ const formatTests = {
6767
hash: '#frag=?bar/#frag',
6868
pathname: '/'
6969
},
70-
'http://google.com" onload="alert(42)/': {
71-
href: 'http://google.com/%22%20onload=%22alert(42)/',
72-
protocol: 'http:',
73-
host: 'google.com',
74-
pathname: '/%22%20onload=%22alert(42)/'
75-
},
7670
'http://a.com/a/b/c?s#h': {
7771
href: 'http://a.com/a/b/c?s#h',
7872
protocol: 'http',

test/parallel/test-url-parse-format.js

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -854,15 +854,15 @@ const parseTests = {
854854
protocol: 'http:',
855855
slashes: true,
856856
auth: 'a\r" \t\n<\'b:b',
857-
host: 'c',
857+
host: 'cd',
858858
port: null,
859-
hostname: 'c',
859+
hostname: 'cd',
860860
hash: null,
861861
search: '?f',
862862
query: 'f',
863-
pathname: '%0D%0Ad/e',
864-
path: '%0D%0Ad/e?f',
865-
href: 'http://a%0D%22%20%09%0A%3C\'b:b@c/%0D%0Ad/e?f'
863+
pathname: '/e',
864+
path: '/e?f',
865+
href: "http://a%0D%22%20%09%0A%3C'b:b@cd/e?f",
866866
},
867867

868868
'https://*': {
@@ -1007,7 +1007,7 @@ for (const u in parseTests) {
10071007
assert.deepStrictEqual(
10081008
actual,
10091009
expected,
1010-
`expected ${inspect(expected)}, got ${inspect(actual)}`
1010+
`parsing ${inspect(u)}, expected ${inspect(expected)}, got ${inspect(actual)}`
10111011
);
10121012
assert.deepStrictEqual(
10131013
spaced,

test/parallel/test-url-parse-invalid-input.js

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,18 @@ if (common.hasIntl) {
8686
`parsing ${badURL}`);
8787
});
8888
}
89+
90+
{
91+
const badChars = [ ' ', '>', '<', '^', '|' ];
92+
for (const badChar of badChars) {
93+
const badURL = `http://fail${badChar}fail.com/`;
94+
assert.throws(() => { url.parse(badURL); },
95+
(e) => e.code === 'ERR_INVALID_URL',
96+
`parsing "${badURL}"`);
97+
}
98+
}
99+
100+
assert.throws(() => { url.parse('http://google.com" onload="alert(42)/'); },
101+
(e) => e.code === 'ERR_INVALID_URL',
102+
'parsing \'http://google.com" onload="alert(42)/\''
103+
);

0 commit comments

Comments
 (0)