Skip to content

Commit ba2e512

Browse files
Review urlescape percent-safe set, and use + behavior for form spaces. (#3373)
1 parent d293374 commit ba2e512

File tree

3 files changed

+28
-47
lines changed

3 files changed

+28
-47
lines changed

httpx/_urlparse.py

Lines changed: 3 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@
9797
]
9898
)
9999

100+
100101
# {scheme}: (optional)
101102
# //{authority} (optional)
102103
# {path}
@@ -478,7 +479,7 @@ def PERCENT(string: str) -> str:
478479
return "".join([f"%{byte:02X}" for byte in string.encode("utf-8")])
479480

480481

481-
def percent_encoded(string: str, safe: str = "/") -> str:
482+
def percent_encoded(string: str, safe: str) -> str:
482483
"""
483484
Use percent-encoding to quote a string.
484485
"""
@@ -493,7 +494,7 @@ def percent_encoded(string: str, safe: str = "/") -> str:
493494
)
494495

495496

496-
def quote(string: str, safe: str = "/") -> str:
497+
def quote(string: str, safe: str) -> str:
497498
"""
498499
Use percent-encoding to quote a string, omitting existing '%xx' escape sequences.
499500
@@ -524,26 +525,3 @@ def quote(string: str, safe: str = "/") -> str:
524525
parts.append(percent_encoded(trailing_text, safe=safe))
525526

526527
return "".join(parts)
527-
528-
529-
def urlencode(items: list[tuple[str, str]]) -> str:
530-
"""
531-
We can use a much simpler version of the stdlib urlencode here because
532-
we don't need to handle a bunch of different typing cases, such as bytes vs str.
533-
534-
https://github.com/python/cpython/blob/b2f7b2ef0b5421e01efb8c7bee2ef95d3bab77eb/Lib/urllib/parse.py#L926
535-
536-
Note that we use '%20' encoding for spaces. and '%2F for '/'.
537-
This is slightly different than `requests`, but is the behaviour that browsers use.
538-
539-
See
540-
- https://github.com/encode/httpx/issues/2536
541-
- https://github.com/encode/httpx/issues/2721
542-
- https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode
543-
"""
544-
return "&".join(
545-
[
546-
percent_encoded(k, safe="") + "=" + percent_encoded(v, safe="")
547-
for k, v in items
548-
]
549-
)

httpx/_urls.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,31 @@
11
from __future__ import annotations
22

33
import typing
4-
from urllib.parse import parse_qs, unquote
4+
from urllib.parse import parse_qs, unquote, urlencode
55

66
import idna
77

88
from ._types import QueryParamTypes
9-
from ._urlparse import urlencode, urlparse
9+
from ._urlparse import urlparse
1010
from ._utils import primitive_value_to_str
1111

1212
__all__ = ["URL", "QueryParams"]
1313

1414

15+
# To urlencode query parameters, we use the whatwg query percent-encode set
16+
# and additionally escape U+0025 (%), U+0026 (&), U+002B (+) and U+003D (=).
17+
18+
# https://url.spec.whatwg.org/#percent-encoded-bytes
19+
20+
URLENCODE_SAFE = "".join(
21+
[
22+
chr(i)
23+
for i in range(0x20, 0x7F)
24+
if i not in (0x20, 0x22, 0x23, 0x25, 0x26, 0x2B, 0x3C, 0x3D, 0x3E)
25+
]
26+
)
27+
28+
1529
class URL:
1630
"""
1731
url = httpx.URL("HTTPS://jo%40email.com:a%20secret@müller.de:1234/pa%20th?search=ab#anchorlink")
@@ -605,14 +619,7 @@ def __eq__(self, other: typing.Any) -> bool:
605619
return sorted(self.multi_items()) == sorted(other.multi_items())
606620

607621
def __str__(self) -> str:
608-
"""
609-
Note that we use '%20' encoding for spaces, and treat '/' as a safe
610-
character.
611-
612-
See https://github.com/encode/httpx/issues/2536 and
613-
https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode
614-
"""
615-
return urlencode(self.multi_items())
622+
return urlencode(self.multi_items(), safe=URLENCODE_SAFE)
616623

617624
def __repr__(self) -> str:
618625
class_name = self.__class__.__name__

tests/models/test_url.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -141,19 +141,14 @@ def test_path_query_fragment(url, raw_path, path, query, fragment):
141141

142142

143143
def test_url_query_encoding():
144-
"""
145-
URL query parameters should use '%20' for encoding spaces,
146-
and should treat '/' as a safe character. This behaviour differs
147-
across clients, but we're matching browser behaviour here.
148-
149-
See https://github.com/encode/httpx/issues/2536
150-
and https://github.com/encode/httpx/discussions/2460
151-
"""
152144
url = httpx.URL("https://www.example.com/?a=b c&d=e/f")
153145
assert url.raw_path == b"/?a=b%20c&d=e/f"
154146

147+
url = httpx.URL("https://www.example.com/?a=b+c&d=e/f")
148+
assert url.raw_path == b"/?a=b+c&d=e/f"
149+
155150
url = httpx.URL("https://www.example.com/", params={"a": "b c", "d": "e/f"})
156-
assert url.raw_path == b"/?a=b%20c&d=e%2Ff"
151+
assert url.raw_path == b"/?a=b+c&d=e/f"
157152

158153

159154
def test_url_params():
@@ -289,9 +284,10 @@ def test_url_leading_dot_prefix_on_relative_url():
289284

290285

291286
def test_param_with_space():
292-
# Params passed as form key-value pairs should be escaped.
287+
# Params passed as form key-value pairs should be form escaped,
288+
# Including the special case of "+" for space seperators.
293289
url = httpx.URL("http://webservice", params={"u": "with spaces"})
294-
assert str(url) == "http://webservice?u=with%20spaces"
290+
assert str(url) == "http://webservice?u=with+spaces"
295291

296292

297293
def test_param_requires_encoding():
@@ -313,7 +309,7 @@ def test_param_with_existing_escape_requires_encoding():
313309
# even if they include a valid escape sequence.
314310
# We want to match browser form behaviour here.
315311
url = httpx.URL("http://webservice", params={"u": "http://example.com?q=foo%2Fa"})
316-
assert str(url) == "http://webservice?u=http%3A%2F%2Fexample.com%3Fq%3Dfoo%252Fa"
312+
assert str(url) == "http://webservice?u=http://example.com?q%3Dfoo%252Fa"
317313

318314

319315
# Tests for query parameter percent encoding.

0 commit comments

Comments
 (0)