1378 lines
45 KiB
Python
1378 lines
45 KiB
Python
|
"""Functions for working with URLs.
|
||
|
|
||
|
Contains implementations of functions from :mod:`urllib.parse` that
|
||
|
handle bytes and strings.
|
||
|
"""
|
||
|
from __future__ import annotations
|
||
|
|
||
|
import codecs
|
||
|
import os
|
||
|
import re
|
||
|
import typing as t
|
||
|
import warnings
|
||
|
from urllib.parse import quote
|
||
|
from urllib.parse import unquote
|
||
|
from urllib.parse import urlencode
|
||
|
from urllib.parse import urlsplit
|
||
|
from urllib.parse import urlunsplit
|
||
|
|
||
|
from ._internal import _check_str_tuple
|
||
|
from ._internal import _decode_idna
|
||
|
from ._internal import _make_encode_wrapper
|
||
|
from ._internal import _to_str
|
||
|
from .datastructures import iter_multi_items
|
||
|
|
||
|
if t.TYPE_CHECKING:
|
||
|
from . import datastructures as ds
|
||
|
|
||
|
# A regular expression for what a valid schema looks like
|
||
|
_scheme_re = re.compile(r"^[a-zA-Z0-9+-.]+$")
|
||
|
|
||
|
# Characters that are safe in any part of an URL.
|
||
|
_always_safe_chars = (
|
||
|
"abcdefghijklmnopqrstuvwxyz"
|
||
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||
|
"0123456789"
|
||
|
"-._~"
|
||
|
"$!'()*+,;" # RFC3986 sub-delims set, not including query string delimiters &=
|
||
|
)
|
||
|
_always_safe = frozenset(_always_safe_chars.encode("ascii"))
|
||
|
|
||
|
_hexdigits = "0123456789ABCDEFabcdef"
|
||
|
_hextobyte = {
|
||
|
f"{a}{b}".encode("ascii"): int(f"{a}{b}", 16)
|
||
|
for a in _hexdigits
|
||
|
for b in _hexdigits
|
||
|
}
|
||
|
_bytetohex = [f"%{char:02X}".encode("ascii") for char in range(256)]
|
||
|
|
||
|
|
||
|
class _URLTuple(t.NamedTuple):
|
||
|
scheme: str
|
||
|
netloc: str
|
||
|
path: str
|
||
|
query: str
|
||
|
fragment: str
|
||
|
|
||
|
|
||
|
class BaseURL(_URLTuple):
|
||
|
"""Superclass of :py:class:`URL` and :py:class:`BytesURL`.
|
||
|
|
||
|
.. deprecated:: 2.3
|
||
|
Will be removed in Werkzeug 3.0. Use the ``urllib.parse`` library instead.
|
||
|
"""
|
||
|
|
||
|
__slots__ = ()
|
||
|
_at: str
|
||
|
_colon: str
|
||
|
_lbracket: str
|
||
|
_rbracket: str
|
||
|
|
||
|
def __new__(cls, *args: t.Any, **kwargs: t.Any) -> BaseURL:
|
||
|
warnings.warn(
|
||
|
f"'werkzeug.urls.{cls.__name__}' is deprecated and will be removed in"
|
||
|
" Werkzeug 3.0. Use the 'urllib.parse' library instead.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
return super().__new__(cls, *args, **kwargs)
|
||
|
|
||
|
def __str__(self) -> str:
|
||
|
return self.to_url()
|
||
|
|
||
|
def replace(self, **kwargs: t.Any) -> BaseURL:
|
||
|
"""Return an URL with the same values, except for those parameters
|
||
|
given new values by whichever keyword arguments are specified."""
|
||
|
return self._replace(**kwargs)
|
||
|
|
||
|
@property
|
||
|
def host(self) -> str | None:
|
||
|
"""The host part of the URL if available, otherwise `None`. The
|
||
|
host is either the hostname or the IP address mentioned in the
|
||
|
URL. It will not contain the port.
|
||
|
"""
|
||
|
return self._split_host()[0]
|
||
|
|
||
|
@property
|
||
|
def ascii_host(self) -> str | None:
|
||
|
"""Works exactly like :attr:`host` but will return a result that
|
||
|
is restricted to ASCII. If it finds a netloc that is not ASCII
|
||
|
it will attempt to idna decode it. This is useful for socket
|
||
|
operations when the URL might include internationalized characters.
|
||
|
"""
|
||
|
rv = self.host
|
||
|
if rv is not None and isinstance(rv, str):
|
||
|
try:
|
||
|
rv = rv.encode("idna").decode("ascii")
|
||
|
except UnicodeError:
|
||
|
pass
|
||
|
return rv
|
||
|
|
||
|
@property
|
||
|
def port(self) -> int | None:
|
||
|
"""The port in the URL as an integer if it was present, `None`
|
||
|
otherwise. This does not fill in default ports.
|
||
|
"""
|
||
|
try:
|
||
|
rv = int(_to_str(self._split_host()[1]))
|
||
|
if 0 <= rv <= 65535:
|
||
|
return rv
|
||
|
except (ValueError, TypeError):
|
||
|
pass
|
||
|
return None
|
||
|
|
||
|
@property
|
||
|
def auth(self) -> str | None:
|
||
|
"""The authentication part in the URL if available, `None`
|
||
|
otherwise.
|
||
|
"""
|
||
|
return self._split_netloc()[0]
|
||
|
|
||
|
@property
|
||
|
def username(self) -> str | None:
|
||
|
"""The username if it was part of the URL, `None` otherwise.
|
||
|
This undergoes URL decoding and will always be a string.
|
||
|
"""
|
||
|
rv = self._split_auth()[0]
|
||
|
if rv is not None:
|
||
|
return _url_unquote_legacy(rv)
|
||
|
return None
|
||
|
|
||
|
@property
|
||
|
def raw_username(self) -> str | None:
|
||
|
"""The username if it was part of the URL, `None` otherwise.
|
||
|
Unlike :attr:`username` this one is not being decoded.
|
||
|
"""
|
||
|
return self._split_auth()[0]
|
||
|
|
||
|
@property
|
||
|
def password(self) -> str | None:
|
||
|
"""The password if it was part of the URL, `None` otherwise.
|
||
|
This undergoes URL decoding and will always be a string.
|
||
|
"""
|
||
|
rv = self._split_auth()[1]
|
||
|
if rv is not None:
|
||
|
return _url_unquote_legacy(rv)
|
||
|
return None
|
||
|
|
||
|
@property
|
||
|
def raw_password(self) -> str | None:
|
||
|
"""The password if it was part of the URL, `None` otherwise.
|
||
|
Unlike :attr:`password` this one is not being decoded.
|
||
|
"""
|
||
|
return self._split_auth()[1]
|
||
|
|
||
|
def decode_query(self, *args: t.Any, **kwargs: t.Any) -> ds.MultiDict[str, str]:
|
||
|
"""Decodes the query part of the URL. Ths is a shortcut for
|
||
|
calling :func:`url_decode` on the query argument. The arguments and
|
||
|
keyword arguments are forwarded to :func:`url_decode` unchanged.
|
||
|
"""
|
||
|
return url_decode(self.query, *args, **kwargs)
|
||
|
|
||
|
def join(self, *args: t.Any, **kwargs: t.Any) -> BaseURL:
|
||
|
"""Joins this URL with another one. This is just a convenience
|
||
|
function for calling into :meth:`url_join` and then parsing the
|
||
|
return value again.
|
||
|
"""
|
||
|
return url_parse(url_join(self, *args, **kwargs))
|
||
|
|
||
|
def to_url(self) -> str:
|
||
|
"""Returns a URL string or bytes depending on the type of the
|
||
|
information stored. This is just a convenience function
|
||
|
for calling :meth:`url_unparse` for this URL.
|
||
|
"""
|
||
|
return url_unparse(self)
|
||
|
|
||
|
def encode_netloc(self) -> str:
|
||
|
"""Encodes the netloc part to an ASCII safe URL as bytes."""
|
||
|
rv = self.ascii_host or ""
|
||
|
if ":" in rv:
|
||
|
rv = f"[{rv}]"
|
||
|
port = self.port
|
||
|
if port is not None:
|
||
|
rv = f"{rv}:{port}"
|
||
|
auth = ":".join(
|
||
|
filter(
|
||
|
None,
|
||
|
[
|
||
|
url_quote(self.raw_username or "", "utf-8", "strict", "/:%"),
|
||
|
url_quote(self.raw_password or "", "utf-8", "strict", "/:%"),
|
||
|
],
|
||
|
)
|
||
|
)
|
||
|
if auth:
|
||
|
rv = f"{auth}@{rv}"
|
||
|
return rv
|
||
|
|
||
|
def decode_netloc(self) -> str:
|
||
|
"""Decodes the netloc part into a string."""
|
||
|
host = self.host or ""
|
||
|
|
||
|
if isinstance(host, bytes):
|
||
|
host = host.decode()
|
||
|
|
||
|
rv = _decode_idna(host)
|
||
|
|
||
|
if ":" in rv:
|
||
|
rv = f"[{rv}]"
|
||
|
port = self.port
|
||
|
if port is not None:
|
||
|
rv = f"{rv}:{port}"
|
||
|
auth = ":".join(
|
||
|
filter(
|
||
|
None,
|
||
|
[
|
||
|
_url_unquote_legacy(self.raw_username or "", "/:%@"),
|
||
|
_url_unquote_legacy(self.raw_password or "", "/:%@"),
|
||
|
],
|
||
|
)
|
||
|
)
|
||
|
if auth:
|
||
|
rv = f"{auth}@{rv}"
|
||
|
return rv
|
||
|
|
||
|
def to_uri_tuple(self) -> BaseURL:
|
||
|
"""Returns a :class:`BytesURL` tuple that holds a URI. This will
|
||
|
encode all the information in the URL properly to ASCII using the
|
||
|
rules a web browser would follow.
|
||
|
|
||
|
It's usually more interesting to directly call :meth:`iri_to_uri` which
|
||
|
will return a string.
|
||
|
"""
|
||
|
return url_parse(iri_to_uri(self))
|
||
|
|
||
|
def to_iri_tuple(self) -> BaseURL:
|
||
|
"""Returns a :class:`URL` tuple that holds a IRI. This will try
|
||
|
to decode as much information as possible in the URL without
|
||
|
losing information similar to how a web browser does it for the
|
||
|
URL bar.
|
||
|
|
||
|
It's usually more interesting to directly call :meth:`uri_to_iri` which
|
||
|
will return a string.
|
||
|
"""
|
||
|
return url_parse(uri_to_iri(self))
|
||
|
|
||
|
def get_file_location(
|
||
|
self, pathformat: str | None = None
|
||
|
) -> tuple[str | None, str | None]:
|
||
|
"""Returns a tuple with the location of the file in the form
|
||
|
``(server, location)``. If the netloc is empty in the URL or
|
||
|
points to localhost, it's represented as ``None``.
|
||
|
|
||
|
The `pathformat` by default is autodetection but needs to be set
|
||
|
when working with URLs of a specific system. The supported values
|
||
|
are ``'windows'`` when working with Windows or DOS paths and
|
||
|
``'posix'`` when working with posix paths.
|
||
|
|
||
|
If the URL does not point to a local file, the server and location
|
||
|
are both represented as ``None``.
|
||
|
|
||
|
:param pathformat: The expected format of the path component.
|
||
|
Currently ``'windows'`` and ``'posix'`` are
|
||
|
supported. Defaults to ``None`` which is
|
||
|
autodetect.
|
||
|
"""
|
||
|
if self.scheme != "file":
|
||
|
return None, None
|
||
|
|
||
|
path = url_unquote(self.path)
|
||
|
host = self.netloc or None
|
||
|
|
||
|
if pathformat is None:
|
||
|
if os.name == "nt":
|
||
|
pathformat = "windows"
|
||
|
else:
|
||
|
pathformat = "posix"
|
||
|
|
||
|
if pathformat == "windows":
|
||
|
if path[:1] == "/" and path[1:2].isalpha() and path[2:3] in "|:":
|
||
|
path = f"{path[1:2]}:{path[3:]}"
|
||
|
windows_share = path[:3] in ("\\" * 3, "/" * 3)
|
||
|
import ntpath
|
||
|
|
||
|
path = ntpath.normpath(path)
|
||
|
# Windows shared drives are represented as ``\\host\\directory``.
|
||
|
# That results in a URL like ``file://///host/directory``, and a
|
||
|
# path like ``///host/directory``. We need to special-case this
|
||
|
# because the path contains the hostname.
|
||
|
if windows_share and host is None:
|
||
|
parts = path.lstrip("\\").split("\\", 1)
|
||
|
if len(parts) == 2:
|
||
|
host, path = parts
|
||
|
else:
|
||
|
host = parts[0]
|
||
|
path = ""
|
||
|
elif pathformat == "posix":
|
||
|
import posixpath
|
||
|
|
||
|
path = posixpath.normpath(path)
|
||
|
else:
|
||
|
raise TypeError(f"Invalid path format {pathformat!r}")
|
||
|
|
||
|
if host in ("127.0.0.1", "::1", "localhost"):
|
||
|
host = None
|
||
|
|
||
|
return host, path
|
||
|
|
||
|
def _split_netloc(self) -> tuple[str | None, str]:
|
||
|
if self._at in self.netloc:
|
||
|
auth, _, netloc = self.netloc.partition(self._at)
|
||
|
return auth, netloc
|
||
|
return None, self.netloc
|
||
|
|
||
|
def _split_auth(self) -> tuple[str | None, str | None]:
|
||
|
auth = self._split_netloc()[0]
|
||
|
if not auth:
|
||
|
return None, None
|
||
|
if self._colon not in auth:
|
||
|
return auth, None
|
||
|
|
||
|
username, _, password = auth.partition(self._colon)
|
||
|
return username, password
|
||
|
|
||
|
def _split_host(self) -> tuple[str | None, str | None]:
|
||
|
rv = self._split_netloc()[1]
|
||
|
if not rv:
|
||
|
return None, None
|
||
|
|
||
|
if not rv.startswith(self._lbracket):
|
||
|
if self._colon in rv:
|
||
|
host, _, port = rv.partition(self._colon)
|
||
|
return host, port
|
||
|
return rv, None
|
||
|
|
||
|
idx = rv.find(self._rbracket)
|
||
|
if idx < 0:
|
||
|
return rv, None
|
||
|
|
||
|
host = rv[1:idx]
|
||
|
rest = rv[idx + 1 :]
|
||
|
if rest.startswith(self._colon):
|
||
|
return host, rest[1:]
|
||
|
return host, None
|
||
|
|
||
|
|
||
|
class URL(BaseURL):
|
||
|
"""Represents a parsed URL. This behaves like a regular tuple but
|
||
|
also has some extra attributes that give further insight into the
|
||
|
URL.
|
||
|
|
||
|
.. deprecated:: 2.3
|
||
|
Will be removed in Werkzeug 3.0. Use the ``urllib.parse`` library instead.
|
||
|
"""
|
||
|
|
||
|
__slots__ = ()
|
||
|
_at = "@"
|
||
|
_colon = ":"
|
||
|
_lbracket = "["
|
||
|
_rbracket = "]"
|
||
|
|
||
|
def encode(self, charset: str = "utf-8", errors: str = "replace") -> BytesURL:
|
||
|
"""Encodes the URL to a tuple made out of bytes. The charset is
|
||
|
only being used for the path, query and fragment.
|
||
|
"""
|
||
|
return BytesURL(
|
||
|
self.scheme.encode("ascii"),
|
||
|
self.encode_netloc(),
|
||
|
self.path.encode(charset, errors),
|
||
|
self.query.encode(charset, errors),
|
||
|
self.fragment.encode(charset, errors),
|
||
|
)
|
||
|
|
||
|
|
||
|
class BytesURL(BaseURL):
|
||
|
"""Represents a parsed URL in bytes.
|
||
|
|
||
|
.. deprecated:: 2.3
|
||
|
Will be removed in Werkzeug 3.0. Use the ``urllib.parse`` library instead.
|
||
|
"""
|
||
|
|
||
|
__slots__ = ()
|
||
|
_at = b"@" # type: ignore
|
||
|
_colon = b":" # type: ignore
|
||
|
_lbracket = b"[" # type: ignore
|
||
|
_rbracket = b"]" # type: ignore
|
||
|
|
||
|
def __str__(self) -> str:
|
||
|
return self.to_url().decode("utf-8", "replace") # type: ignore
|
||
|
|
||
|
def encode_netloc(self) -> bytes: # type: ignore
|
||
|
"""Returns the netloc unchanged as bytes."""
|
||
|
return self.netloc # type: ignore
|
||
|
|
||
|
def decode(self, charset: str = "utf-8", errors: str = "replace") -> URL:
|
||
|
"""Decodes the URL to a tuple made out of strings. The charset is
|
||
|
only being used for the path, query and fragment.
|
||
|
"""
|
||
|
return URL(
|
||
|
self.scheme.decode("ascii"), # type: ignore
|
||
|
self.decode_netloc(),
|
||
|
self.path.decode(charset, errors), # type: ignore
|
||
|
self.query.decode(charset, errors), # type: ignore
|
||
|
self.fragment.decode(charset, errors), # type: ignore
|
||
|
)
|
||
|
|
||
|
|
||
|
_unquote_maps: dict[frozenset[int], dict[bytes, int]] = {frozenset(): _hextobyte}
|
||
|
|
||
|
|
||
|
def _unquote_to_bytes(string: str | bytes, unsafe: str | bytes = "") -> bytes:
|
||
|
if isinstance(string, str):
|
||
|
string = string.encode("utf-8")
|
||
|
|
||
|
if isinstance(unsafe, str):
|
||
|
unsafe = unsafe.encode("utf-8")
|
||
|
|
||
|
unsafe = frozenset(bytearray(unsafe))
|
||
|
groups = iter(string.split(b"%"))
|
||
|
result = bytearray(next(groups, b""))
|
||
|
|
||
|
try:
|
||
|
hex_to_byte = _unquote_maps[unsafe]
|
||
|
except KeyError:
|
||
|
hex_to_byte = _unquote_maps[unsafe] = {
|
||
|
h: b for h, b in _hextobyte.items() if b not in unsafe
|
||
|
}
|
||
|
|
||
|
for group in groups:
|
||
|
code = group[:2]
|
||
|
|
||
|
if code in hex_to_byte:
|
||
|
result.append(hex_to_byte[code])
|
||
|
result.extend(group[2:])
|
||
|
else:
|
||
|
result.append(37) # %
|
||
|
result.extend(group)
|
||
|
|
||
|
return bytes(result)
|
||
|
|
||
|
|
||
|
def _url_encode_impl(
|
||
|
obj: t.Mapping[str, str] | t.Iterable[tuple[str, str]],
|
||
|
charset: str,
|
||
|
sort: bool,
|
||
|
key: t.Callable[[tuple[str, str]], t.Any] | None,
|
||
|
) -> t.Iterator[str]:
|
||
|
from .datastructures import iter_multi_items
|
||
|
|
||
|
iterable: t.Iterable[tuple[str, str]] = iter_multi_items(obj)
|
||
|
|
||
|
if sort:
|
||
|
iterable = sorted(iterable, key=key)
|
||
|
|
||
|
for key_str, value_str in iterable:
|
||
|
if value_str is None:
|
||
|
continue
|
||
|
|
||
|
if not isinstance(key_str, bytes):
|
||
|
key_bytes = str(key_str).encode(charset)
|
||
|
else:
|
||
|
key_bytes = key_str
|
||
|
|
||
|
if not isinstance(value_str, bytes):
|
||
|
value_bytes = str(value_str).encode(charset)
|
||
|
else:
|
||
|
value_bytes = value_str
|
||
|
|
||
|
yield f"{_fast_url_quote_plus(key_bytes)}={_fast_url_quote_plus(value_bytes)}"
|
||
|
|
||
|
|
||
|
def _url_unquote_legacy(value: str, unsafe: str = "") -> str:
|
||
|
try:
|
||
|
return url_unquote(value, charset="utf-8", errors="strict", unsafe=unsafe)
|
||
|
except UnicodeError:
|
||
|
return url_unquote(value, charset="latin1", unsafe=unsafe)
|
||
|
|
||
|
|
||
|
def url_parse(
|
||
|
url: str, scheme: str | None = None, allow_fragments: bool = True
|
||
|
) -> BaseURL:
|
||
|
"""Parses a URL from a string into a :class:`URL` tuple. If the URL
|
||
|
is lacking a scheme it can be provided as second argument. Otherwise,
|
||
|
it is ignored. Optionally fragments can be stripped from the URL
|
||
|
by setting `allow_fragments` to `False`.
|
||
|
|
||
|
The inverse of this function is :func:`url_unparse`.
|
||
|
|
||
|
:param url: the URL to parse.
|
||
|
:param scheme: the default schema to use if the URL is schemaless.
|
||
|
:param allow_fragments: if set to `False` a fragment will be removed
|
||
|
from the URL.
|
||
|
|
||
|
.. deprecated:: 2.3
|
||
|
Will be removed in Werkzeug 3.0. Use ``urllib.parse.urlsplit`` instead.
|
||
|
"""
|
||
|
warnings.warn(
|
||
|
"'werkzeug.urls.url_parse' is deprecated and will be removed in Werkzeug 3.0."
|
||
|
" Use 'urllib.parse.urlsplit' instead.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
s = _make_encode_wrapper(url)
|
||
|
is_text_based = isinstance(url, str)
|
||
|
|
||
|
if scheme is None:
|
||
|
scheme = s("")
|
||
|
netloc = query = fragment = s("")
|
||
|
i = url.find(s(":"))
|
||
|
if i > 0 and _scheme_re.match(_to_str(url[:i], errors="replace")):
|
||
|
# make sure "iri" is not actually a port number (in which case
|
||
|
# "scheme" is really part of the path)
|
||
|
rest = url[i + 1 :]
|
||
|
if not rest or any(c not in s("0123456789") for c in rest):
|
||
|
# not a port number
|
||
|
scheme, url = url[:i].lower(), rest
|
||
|
|
||
|
if url[:2] == s("//"):
|
||
|
delim = len(url)
|
||
|
for c in s("/?#"):
|
||
|
wdelim = url.find(c, 2)
|
||
|
if wdelim >= 0:
|
||
|
delim = min(delim, wdelim)
|
||
|
netloc, url = url[2:delim], url[delim:]
|
||
|
if (s("[") in netloc and s("]") not in netloc) or (
|
||
|
s("]") in netloc and s("[") not in netloc
|
||
|
):
|
||
|
raise ValueError("Invalid IPv6 URL")
|
||
|
|
||
|
if allow_fragments and s("#") in url:
|
||
|
url, fragment = url.split(s("#"), 1)
|
||
|
if s("?") in url:
|
||
|
url, query = url.split(s("?"), 1)
|
||
|
|
||
|
result_type = URL if is_text_based else BytesURL
|
||
|
|
||
|
return result_type(scheme, netloc, url, query, fragment)
|
||
|
|
||
|
|
||
|
def _make_fast_url_quote(
|
||
|
charset: str = "utf-8",
|
||
|
errors: str = "strict",
|
||
|
safe: str | bytes = "/:",
|
||
|
unsafe: str | bytes = "",
|
||
|
) -> t.Callable[[bytes], str]:
|
||
|
"""Precompile the translation table for a URL encoding function.
|
||
|
|
||
|
Unlike :func:`url_quote`, the generated function only takes the
|
||
|
string to quote.
|
||
|
|
||
|
:param charset: The charset to encode the result with.
|
||
|
:param errors: How to handle encoding errors.
|
||
|
:param safe: An optional sequence of safe characters to never encode.
|
||
|
:param unsafe: An optional sequence of unsafe characters to always encode.
|
||
|
"""
|
||
|
if isinstance(safe, str):
|
||
|
safe = safe.encode(charset, errors)
|
||
|
|
||
|
if isinstance(unsafe, str):
|
||
|
unsafe = unsafe.encode(charset, errors)
|
||
|
|
||
|
safe = (frozenset(bytearray(safe)) | _always_safe) - frozenset(bytearray(unsafe))
|
||
|
table = [chr(c) if c in safe else f"%{c:02X}" for c in range(256)]
|
||
|
|
||
|
def quote(string: bytes) -> str:
|
||
|
return "".join([table[c] for c in string])
|
||
|
|
||
|
return quote
|
||
|
|
||
|
|
||
|
_fast_url_quote = _make_fast_url_quote()
|
||
|
_fast_quote_plus = _make_fast_url_quote(safe=" ", unsafe="+")
|
||
|
|
||
|
|
||
|
def _fast_url_quote_plus(string: bytes) -> str:
|
||
|
return _fast_quote_plus(string).replace(" ", "+")
|
||
|
|
||
|
|
||
|
def url_quote(
|
||
|
string: str | bytes,
|
||
|
charset: str = "utf-8",
|
||
|
errors: str = "strict",
|
||
|
safe: str | bytes = "/:",
|
||
|
unsafe: str | bytes = "",
|
||
|
) -> str:
|
||
|
"""URL encode a single string with a given encoding.
|
||
|
|
||
|
:param s: the string to quote.
|
||
|
:param charset: the charset to be used.
|
||
|
:param safe: an optional sequence of safe characters.
|
||
|
:param unsafe: an optional sequence of unsafe characters.
|
||
|
|
||
|
.. deprecated:: 2.3
|
||
|
Will be removed in Werkzeug 3.0. Use ``urllib.parse.quote`` instead.
|
||
|
|
||
|
.. versionadded:: 0.9.2
|
||
|
The `unsafe` parameter was added.
|
||
|
"""
|
||
|
warnings.warn(
|
||
|
"'werkzeug.urls.url_quote' is deprecated and will be removed in Werkzeug 3.0."
|
||
|
" Use 'urllib.parse.quote' instead.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
|
||
|
if not isinstance(string, (str, bytes, bytearray)):
|
||
|
string = str(string)
|
||
|
if isinstance(string, str):
|
||
|
string = string.encode(charset, errors)
|
||
|
if isinstance(safe, str):
|
||
|
safe = safe.encode(charset, errors)
|
||
|
if isinstance(unsafe, str):
|
||
|
unsafe = unsafe.encode(charset, errors)
|
||
|
safe = (frozenset(bytearray(safe)) | _always_safe) - frozenset(bytearray(unsafe))
|
||
|
rv = bytearray()
|
||
|
for char in bytearray(string):
|
||
|
if char in safe:
|
||
|
rv.append(char)
|
||
|
else:
|
||
|
rv.extend(_bytetohex[char])
|
||
|
return bytes(rv).decode(charset)
|
||
|
|
||
|
|
||
|
def url_quote_plus(
|
||
|
string: str, charset: str = "utf-8", errors: str = "strict", safe: str = ""
|
||
|
) -> str:
|
||
|
"""URL encode a single string with the given encoding and convert
|
||
|
whitespace to "+".
|
||
|
|
||
|
:param s: The string to quote.
|
||
|
:param charset: The charset to be used.
|
||
|
:param safe: An optional sequence of safe characters.
|
||
|
|
||
|
.. deprecated:: 2.3
|
||
|
Will be removed in Werkzeug 3.0. Use ``urllib.parse.quote_plus`` instead.
|
||
|
"""
|
||
|
warnings.warn(
|
||
|
"'werkzeug.urls.url_quote_plus' is deprecated and will be removed in Werkzeug"
|
||
|
" 2.4. Use 'urllib.parse.quote_plus' instead.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
|
||
|
return url_quote(string, charset, errors, safe + " ", "+").replace(" ", "+")
|
||
|
|
||
|
|
||
|
def url_unparse(components: tuple[str, str, str, str, str]) -> str:
|
||
|
"""The reverse operation to :meth:`url_parse`. This accepts arbitrary
|
||
|
as well as :class:`URL` tuples and returns a URL as a string.
|
||
|
|
||
|
:param components: the parsed URL as tuple which should be converted
|
||
|
into a URL string.
|
||
|
|
||
|
.. deprecated:: 2.3
|
||
|
Will be removed in Werkzeug 3.0. Use ``urllib.parse.urlunsplit`` instead.
|
||
|
"""
|
||
|
warnings.warn(
|
||
|
"'werkzeug.urls.url_unparse' is deprecated and will be removed in Werkzeug 3.0."
|
||
|
" Use 'urllib.parse.urlunsplit' instead.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
_check_str_tuple(components)
|
||
|
scheme, netloc, path, query, fragment = components
|
||
|
s = _make_encode_wrapper(scheme)
|
||
|
url = s("")
|
||
|
|
||
|
# We generally treat file:///x and file:/x the same which is also
|
||
|
# what browsers seem to do. This also allows us to ignore a schema
|
||
|
# register for netloc utilization or having to differentiate between
|
||
|
# empty and missing netloc.
|
||
|
if netloc or (scheme and path.startswith(s("/"))):
|
||
|
if path and path[:1] != s("/"):
|
||
|
path = s("/") + path
|
||
|
url = s("//") + (netloc or s("")) + path
|
||
|
elif path:
|
||
|
url += path
|
||
|
if scheme:
|
||
|
url = scheme + s(":") + url
|
||
|
if query:
|
||
|
url = url + s("?") + query
|
||
|
if fragment:
|
||
|
url = url + s("#") + fragment
|
||
|
return url
|
||
|
|
||
|
|
||
|
def url_unquote(
|
||
|
s: str | bytes,
|
||
|
charset: str = "utf-8",
|
||
|
errors: str = "replace",
|
||
|
unsafe: str = "",
|
||
|
) -> str:
|
||
|
"""URL decode a single string with a given encoding. If the charset
|
||
|
is set to `None` no decoding is performed and raw bytes are
|
||
|
returned.
|
||
|
|
||
|
:param s: the string to unquote.
|
||
|
:param charset: the charset of the query string. If set to `None`
|
||
|
no decoding will take place.
|
||
|
:param errors: the error handling for the charset decoding.
|
||
|
|
||
|
.. deprecated:: 2.3
|
||
|
Will be removed in Werkzeug 3.0. Use ``urllib.parse.unquote`` instead.
|
||
|
"""
|
||
|
warnings.warn(
|
||
|
"'werkzeug.urls.url_unquote' is deprecated and will be removed in Werkzeug 3.0."
|
||
|
" Use 'urllib.parse.unquote' instead.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
rv = _unquote_to_bytes(s, unsafe)
|
||
|
if charset is None:
|
||
|
return rv
|
||
|
return rv.decode(charset, errors)
|
||
|
|
||
|
|
||
|
def url_unquote_plus(
|
||
|
s: str | bytes, charset: str = "utf-8", errors: str = "replace"
|
||
|
) -> str:
|
||
|
"""URL decode a single string with the given `charset` and decode "+" to
|
||
|
whitespace.
|
||
|
|
||
|
Per default encoding errors are ignored. If you want a different behavior
|
||
|
you can set `errors` to ``'replace'`` or ``'strict'``.
|
||
|
|
||
|
:param s: The string to unquote.
|
||
|
:param charset: the charset of the query string. If set to `None`
|
||
|
no decoding will take place.
|
||
|
:param errors: The error handling for the `charset` decoding.
|
||
|
|
||
|
.. deprecated:: 2.3
|
||
|
Will be removed in Werkzeug 3.0. Use ``urllib.parse.unquote_plus`` instead.
|
||
|
"""
|
||
|
warnings.warn(
|
||
|
"'werkzeug.urls.url_unquote_plus' is deprecated and will be removed in Werkzeug"
|
||
|
" 2.4. Use 'urllib.parse.unquote_plus' instead.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
|
||
|
if isinstance(s, str):
|
||
|
s = s.replace("+", " ")
|
||
|
else:
|
||
|
s = s.replace(b"+", b" ")
|
||
|
|
||
|
return url_unquote(s, charset, errors)
|
||
|
|
||
|
|
||
|
def url_fix(s: str, charset: str = "utf-8") -> str:
|
||
|
r"""Sometimes you get an URL by a user that just isn't a real URL because
|
||
|
it contains unsafe characters like ' ' and so on. This function can fix
|
||
|
some of the problems in a similar way browsers handle data entered by the
|
||
|
user:
|
||
|
|
||
|
>>> url_fix('http://de.wikipedia.org/wiki/Elf (Begriffskl\xe4rung)')
|
||
|
'http://de.wikipedia.org/wiki/Elf%20(Begriffskl%C3%A4rung)'
|
||
|
|
||
|
:param s: the string with the URL to fix.
|
||
|
:param charset: The target charset for the URL if the url was given
|
||
|
as a string.
|
||
|
|
||
|
.. deprecated:: 2.3
|
||
|
Will be removed in Werkzeug 3.0.
|
||
|
"""
|
||
|
warnings.warn(
|
||
|
"'werkzeug.urls.url_fix' is deprecated and will be removed in Werkzeug 3.0.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
# First step is to switch to text processing and to convert
|
||
|
# backslashes (which are invalid in URLs anyways) to slashes. This is
|
||
|
# consistent with what Chrome does.
|
||
|
s = _to_str(s, charset, "replace").replace("\\", "/")
|
||
|
|
||
|
# For the specific case that we look like a malformed windows URL
|
||
|
# we want to fix this up manually:
|
||
|
if s.startswith("file://") and s[7:8].isalpha() and s[8:10] in (":/", "|/"):
|
||
|
s = f"file:///{s[7:]}"
|
||
|
|
||
|
url = url_parse(s)
|
||
|
path = url_quote(url.path, charset, safe="/%+$!*'(),")
|
||
|
qs = url_quote_plus(url.query, charset, safe=":&%=+$!*'(),")
|
||
|
anchor = url_quote_plus(url.fragment, charset, safe=":&%=+$!*'(),")
|
||
|
return url_unparse((url.scheme, url.encode_netloc(), path, qs, anchor))
|
||
|
|
||
|
|
||
|
def _codec_error_url_quote(e: UnicodeError) -> tuple[str, int]:
|
||
|
"""Used in :func:`uri_to_iri` after unquoting to re-quote any
|
||
|
invalid bytes.
|
||
|
"""
|
||
|
# the docs state that UnicodeError does have these attributes,
|
||
|
# but mypy isn't picking them up
|
||
|
out = quote(e.object[e.start : e.end], safe="") # type: ignore
|
||
|
return out, e.end # type: ignore
|
||
|
|
||
|
|
||
|
codecs.register_error("werkzeug.url_quote", _codec_error_url_quote)
|
||
|
|
||
|
|
||
|
def _make_unquote_part(name: str, chars: str) -> t.Callable[[str, str, str], str]:
|
||
|
"""Create a function that unquotes all percent encoded characters except those
|
||
|
given. This allows working with unquoted characters if possible while not changing
|
||
|
the meaning of a given part of a URL.
|
||
|
"""
|
||
|
choices = "|".join(f"{ord(c):02X}" for c in sorted(chars))
|
||
|
pattern = re.compile(f"((?:%(?:{choices}))+)", re.I)
|
||
|
|
||
|
def _unquote_partial(value: str, encoding: str, errors: str) -> str:
|
||
|
parts = iter(pattern.split(value))
|
||
|
out = []
|
||
|
|
||
|
for part in parts:
|
||
|
out.append(unquote(part, encoding, errors))
|
||
|
out.append(next(parts, ""))
|
||
|
|
||
|
return "".join(out)
|
||
|
|
||
|
_unquote_partial.__name__ = f"_unquote_{name}"
|
||
|
return _unquote_partial
|
||
|
|
||
|
|
||
|
# characters that should remain quoted in URL parts
|
||
|
# based on https://url.spec.whatwg.org/#percent-encoded-bytes
|
||
|
# always keep all controls, space, and % quoted
|
||
|
_always_unsafe = bytes((*range(0x21), 0x25, 0x7F)).decode()
|
||
|
_unquote_fragment = _make_unquote_part("fragment", _always_unsafe)
|
||
|
_unquote_query = _make_unquote_part("query", _always_unsafe + "&=+#")
|
||
|
_unquote_path = _make_unquote_part("path", _always_unsafe + "/?#")
|
||
|
_unquote_user = _make_unquote_part("user", _always_unsafe + ":@/?#")
|
||
|
|
||
|
|
||
|
def uri_to_iri(
|
||
|
uri: str | tuple[str, str, str, str, str],
|
||
|
charset: str | None = None,
|
||
|
errors: str | None = None,
|
||
|
) -> str:
|
||
|
"""Convert a URI to an IRI. All valid UTF-8 characters are unquoted,
|
||
|
leaving all reserved and invalid characters quoted. If the URL has
|
||
|
a domain, it is decoded from Punycode.
|
||
|
|
||
|
>>> uri_to_iri("http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF")
|
||
|
'http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF'
|
||
|
|
||
|
:param uri: The URI to convert.
|
||
|
:param charset: The encoding to encode unquoted bytes with.
|
||
|
:param errors: Error handler to use during ``bytes.encode``. By
|
||
|
default, invalid bytes are left quoted.
|
||
|
|
||
|
.. versionchanged:: 2.3
|
||
|
Passing a tuple or bytes, and the ``charset`` and ``errors`` parameters, are
|
||
|
deprecated and will be removed in Werkzeug 3.0.
|
||
|
|
||
|
.. versionchanged:: 2.3
|
||
|
Which characters remain quoted is specific to each part of the URL.
|
||
|
|
||
|
.. versionchanged:: 0.15
|
||
|
All reserved and invalid characters remain quoted. Previously,
|
||
|
only some reserved characters were preserved, and invalid bytes
|
||
|
were replaced instead of left quoted.
|
||
|
|
||
|
.. versionadded:: 0.6
|
||
|
"""
|
||
|
if isinstance(uri, tuple):
|
||
|
warnings.warn(
|
||
|
"Passing a tuple is deprecated and will not be supported in Werkzeug 3.0.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
uri = urlunsplit(uri)
|
||
|
|
||
|
if isinstance(uri, bytes):
|
||
|
warnings.warn(
|
||
|
"Passing bytes is deprecated and will not be supported in Werkzeug 3.0.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
uri = uri.decode()
|
||
|
|
||
|
if charset is not None:
|
||
|
warnings.warn(
|
||
|
"The 'charset' parameter is deprecated and will be removed"
|
||
|
" in Werkzeug 3.0.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
else:
|
||
|
charset = "utf-8"
|
||
|
|
||
|
if errors is not None:
|
||
|
warnings.warn(
|
||
|
"The 'errors' parameter is deprecated and will be removed in Werkzeug 3.0.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
else:
|
||
|
errors = "werkzeug.url_quote"
|
||
|
|
||
|
parts = urlsplit(uri)
|
||
|
path = _unquote_path(parts.path, charset, errors)
|
||
|
query = _unquote_query(parts.query, charset, errors)
|
||
|
fragment = _unquote_fragment(parts.fragment, charset, errors)
|
||
|
|
||
|
if parts.hostname:
|
||
|
netloc = _decode_idna(parts.hostname)
|
||
|
else:
|
||
|
netloc = ""
|
||
|
|
||
|
if ":" in netloc:
|
||
|
netloc = f"[{netloc}]"
|
||
|
|
||
|
if parts.port:
|
||
|
netloc = f"{netloc}:{parts.port}"
|
||
|
|
||
|
if parts.username:
|
||
|
auth = _unquote_user(parts.username, charset, errors)
|
||
|
|
||
|
if parts.password:
|
||
|
auth = f"{auth}:{_unquote_user(parts.password, charset, errors)}"
|
||
|
|
||
|
netloc = f"{auth}@{netloc}"
|
||
|
|
||
|
return urlunsplit((parts.scheme, netloc, path, query, fragment))
|
||
|
|
||
|
|
||
|
def iri_to_uri(
|
||
|
iri: str | tuple[str, str, str, str, str],
|
||
|
charset: str | None = None,
|
||
|
errors: str | None = None,
|
||
|
safe_conversion: bool | None = None,
|
||
|
) -> str:
|
||
|
"""Convert an IRI to a URI. All non-ASCII and unsafe characters are
|
||
|
quoted. If the URL has a domain, it is encoded to Punycode.
|
||
|
|
||
|
>>> iri_to_uri('http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF')
|
||
|
'http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF'
|
||
|
|
||
|
:param iri: The IRI to convert.
|
||
|
:param charset: The encoding of the IRI.
|
||
|
:param errors: Error handler to use during ``bytes.encode``.
|
||
|
|
||
|
.. versionchanged:: 2.3
|
||
|
Passing a tuple or bytes, and the ``charset`` and ``errors`` parameters, are
|
||
|
deprecated and will be removed in Werkzeug 3.0.
|
||
|
|
||
|
.. versionchanged:: 2.3
|
||
|
Which characters remain unquoted is specific to each part of the URL.
|
||
|
|
||
|
.. versionchanged:: 2.3
|
||
|
The ``safe_conversion`` parameter is deprecated and will be removed in Werkzeug
|
||
|
2.4.
|
||
|
|
||
|
.. versionchanged:: 0.15
|
||
|
All reserved characters remain unquoted. Previously, only some reserved
|
||
|
characters were left unquoted.
|
||
|
|
||
|
.. versionchanged:: 0.9.6
|
||
|
The ``safe_conversion`` parameter was added.
|
||
|
|
||
|
.. versionadded:: 0.6
|
||
|
"""
|
||
|
if charset is not None:
|
||
|
warnings.warn(
|
||
|
"The 'charset' parameter is deprecated and will be removed"
|
||
|
" in Werkzeug 3.0.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
else:
|
||
|
charset = "utf-8"
|
||
|
|
||
|
if isinstance(iri, tuple):
|
||
|
warnings.warn(
|
||
|
"Passing a tuple is deprecated and will not be supported in Werkzeug 3.0.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
iri = urlunsplit(iri)
|
||
|
|
||
|
if isinstance(iri, bytes):
|
||
|
warnings.warn(
|
||
|
"Passing bytes is deprecated and will not be supported in Werkzeug 3.0.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
iri = iri.decode(charset)
|
||
|
|
||
|
if errors is not None:
|
||
|
warnings.warn(
|
||
|
"The 'errors' parameter is deprecated and will be removed in Werkzeug 3.0.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
else:
|
||
|
errors = "strict"
|
||
|
|
||
|
if safe_conversion is not None:
|
||
|
warnings.warn(
|
||
|
"The 'safe_conversion' parameter is deprecated and will be removed in"
|
||
|
" Werkzeug 3.0.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
|
||
|
if safe_conversion:
|
||
|
# If we're not sure if it's safe to normalize the URL, and it only contains
|
||
|
# ASCII characters, return it as-is.
|
||
|
try:
|
||
|
ascii_iri = iri.encode("ascii")
|
||
|
|
||
|
# Only return if it doesn't have whitespace. (Why?)
|
||
|
if len(ascii_iri.split()) == 1:
|
||
|
return iri
|
||
|
except UnicodeError:
|
||
|
pass
|
||
|
|
||
|
parts = urlsplit(iri)
|
||
|
# safe = https://url.spec.whatwg.org/#url-path-segment-string
|
||
|
# as well as percent for things that are already quoted
|
||
|
path = quote(parts.path, safe="%!$&'()*+,/:;=@", encoding=charset, errors=errors)
|
||
|
query = quote(parts.query, safe="%!$&'()*+,/:;=?@", encoding=charset, errors=errors)
|
||
|
fragment = quote(
|
||
|
parts.fragment, safe="%!#$&'()*+,/:;=?@", encoding=charset, errors=errors
|
||
|
)
|
||
|
|
||
|
if parts.hostname:
|
||
|
netloc = parts.hostname.encode("idna").decode("ascii")
|
||
|
else:
|
||
|
netloc = ""
|
||
|
|
||
|
if ":" in netloc:
|
||
|
netloc = f"[{netloc}]"
|
||
|
|
||
|
if parts.port:
|
||
|
netloc = f"{netloc}:{parts.port}"
|
||
|
|
||
|
if parts.username:
|
||
|
auth = quote(parts.username, safe="%!$&'()*+,;=")
|
||
|
|
||
|
if parts.password:
|
||
|
pass_quoted = quote(parts.password, safe="%!$&'()*+,;=")
|
||
|
auth = f"{auth}:{pass_quoted}"
|
||
|
|
||
|
netloc = f"{auth}@{netloc}"
|
||
|
|
||
|
return urlunsplit((parts.scheme, netloc, path, query, fragment))
|
||
|
|
||
|
|
||
|
def _invalid_iri_to_uri(iri: str) -> str:
|
||
|
"""The URL scheme ``itms-services://`` must contain the ``//`` even though it does
|
||
|
not have a host component. There may be other invalid schemes as well. Currently,
|
||
|
responses will always call ``iri_to_uri`` on the redirect ``Location`` header, which
|
||
|
removes the ``//``. For now, if the IRI only contains ASCII and does not contain
|
||
|
spaces, pass it on as-is. In Werkzeug 3.0, this should become a
|
||
|
``response.process_location`` flag.
|
||
|
|
||
|
:meta private:
|
||
|
"""
|
||
|
try:
|
||
|
iri.encode("ascii")
|
||
|
except UnicodeError:
|
||
|
pass
|
||
|
else:
|
||
|
if len(iri.split(None, 1)) == 1:
|
||
|
return iri
|
||
|
|
||
|
return iri_to_uri(iri)
|
||
|
|
||
|
|
||
|
def url_decode(
|
||
|
s: t.AnyStr,
|
||
|
charset: str = "utf-8",
|
||
|
include_empty: bool = True,
|
||
|
errors: str = "replace",
|
||
|
separator: str = "&",
|
||
|
cls: type[ds.MultiDict] | None = None,
|
||
|
) -> ds.MultiDict[str, str]:
|
||
|
"""Parse a query string and return it as a :class:`MultiDict`.
|
||
|
|
||
|
:param s: The query string to parse.
|
||
|
:param charset: Decode bytes to string with this charset. If not
|
||
|
given, bytes are returned as-is.
|
||
|
:param include_empty: Include keys with empty values in the dict.
|
||
|
:param errors: Error handling behavior when decoding bytes.
|
||
|
:param separator: Separator character between pairs.
|
||
|
:param cls: Container to hold result instead of :class:`MultiDict`.
|
||
|
|
||
|
.. deprecated:: 2.3
|
||
|
Will be removed in Werkzeug 3.0. Use ``urllib.parse.parse_qs`` instead.
|
||
|
|
||
|
.. versionchanged:: 2.1
|
||
|
The ``decode_keys`` parameter was removed.
|
||
|
|
||
|
.. versionchanged:: 0.5
|
||
|
In previous versions ";" and "&" could be used for url decoding.
|
||
|
Now only "&" is supported. If you want to use ";", a different
|
||
|
``separator`` can be provided.
|
||
|
|
||
|
.. versionchanged:: 0.5
|
||
|
The ``cls`` parameter was added.
|
||
|
"""
|
||
|
warnings.warn(
|
||
|
"'werkzeug.urls.url_decode' is deprecated and will be removed in Werkzeug 2.4."
|
||
|
" Use 'urllib.parse.parse_qs' instead.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
|
||
|
if cls is None:
|
||
|
from .datastructures import MultiDict # noqa: F811
|
||
|
|
||
|
cls = MultiDict
|
||
|
if isinstance(s, str) and not isinstance(separator, str):
|
||
|
separator = separator.decode(charset or "ascii")
|
||
|
elif isinstance(s, bytes) and not isinstance(separator, bytes):
|
||
|
separator = separator.encode(charset or "ascii") # type: ignore
|
||
|
return cls(
|
||
|
_url_decode_impl(
|
||
|
s.split(separator), charset, include_empty, errors # type: ignore
|
||
|
)
|
||
|
)
|
||
|
|
||
|
|
||
|
def url_decode_stream(
|
||
|
stream: t.IO[bytes],
|
||
|
charset: str = "utf-8",
|
||
|
include_empty: bool = True,
|
||
|
errors: str = "replace",
|
||
|
separator: bytes = b"&",
|
||
|
cls: type[ds.MultiDict] | None = None,
|
||
|
limit: int | None = None,
|
||
|
) -> ds.MultiDict[str, str]:
|
||
|
"""Works like :func:`url_decode` but decodes a stream. The behavior
|
||
|
of stream and limit follows functions like
|
||
|
:func:`~werkzeug.wsgi.make_line_iter`. The generator of pairs is
|
||
|
directly fed to the `cls` so you can consume the data while it's
|
||
|
parsed.
|
||
|
|
||
|
:param stream: a stream with the encoded querystring
|
||
|
:param charset: the charset of the query string. If set to `None`
|
||
|
no decoding will take place.
|
||
|
:param include_empty: Set to `False` if you don't want empty values to
|
||
|
appear in the dict.
|
||
|
:param errors: the decoding error behavior.
|
||
|
:param separator: the pair separator to be used, defaults to ``&``
|
||
|
:param cls: an optional dict class to use. If this is not specified
|
||
|
or `None` the default :class:`MultiDict` is used.
|
||
|
:param limit: the content length of the URL data. Not necessary if
|
||
|
a limited stream is provided.
|
||
|
|
||
|
.. deprecated:: 2.3
|
||
|
Will be removed in Werkzeug 2.4. Use ``urllib.parse.parse_qs`` instead.
|
||
|
|
||
|
.. versionchanged:: 2.1
|
||
|
The ``decode_keys`` and ``return_iterator`` parameters were removed.
|
||
|
|
||
|
.. versionadded:: 0.8
|
||
|
"""
|
||
|
warnings.warn(
|
||
|
"'werkzeug.urls.url_decode_stream' is deprecated and will be removed in"
|
||
|
" Werkzeug 2.4. Use 'urllib.parse.parse_qs' instead.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
|
||
|
from .wsgi import make_chunk_iter
|
||
|
|
||
|
pair_iter = make_chunk_iter(stream, separator, limit)
|
||
|
decoder = _url_decode_impl(pair_iter, charset, include_empty, errors)
|
||
|
|
||
|
if cls is None:
|
||
|
from .datastructures import MultiDict # noqa: F811
|
||
|
|
||
|
cls = MultiDict
|
||
|
|
||
|
return cls(decoder)
|
||
|
|
||
|
|
||
|
def _url_decode_impl(
|
||
|
pair_iter: t.Iterable[t.AnyStr], charset: str, include_empty: bool, errors: str
|
||
|
) -> t.Iterator[tuple[str, str]]:
|
||
|
for pair in pair_iter:
|
||
|
if not pair:
|
||
|
continue
|
||
|
s = _make_encode_wrapper(pair)
|
||
|
equal = s("=")
|
||
|
if equal in pair:
|
||
|
key, value = pair.split(equal, 1)
|
||
|
else:
|
||
|
if not include_empty:
|
||
|
continue
|
||
|
key = pair
|
||
|
value = s("")
|
||
|
yield (
|
||
|
url_unquote_plus(key, charset, errors),
|
||
|
url_unquote_plus(value, charset, errors),
|
||
|
)
|
||
|
|
||
|
|
||
|
def url_encode(
|
||
|
obj: t.Mapping[str, str] | t.Iterable[tuple[str, str]],
|
||
|
charset: str = "utf-8",
|
||
|
sort: bool = False,
|
||
|
key: t.Callable[[tuple[str, str]], t.Any] | None = None,
|
||
|
separator: str = "&",
|
||
|
) -> str:
|
||
|
"""URL encode a dict/`MultiDict`. If a value is `None` it will not appear
|
||
|
in the result string. Per default only values are encoded into the target
|
||
|
charset strings.
|
||
|
|
||
|
:param obj: the object to encode into a query string.
|
||
|
:param charset: the charset of the query string.
|
||
|
:param sort: set to `True` if you want parameters to be sorted by `key`.
|
||
|
:param separator: the separator to be used for the pairs.
|
||
|
:param key: an optional function to be used for sorting. For more details
|
||
|
check out the :func:`sorted` documentation.
|
||
|
|
||
|
.. deprecated:: 2.3
|
||
|
Will be removed in Werkzeug 2.4. Use ``urllib.parse.urlencode`` instead.
|
||
|
|
||
|
.. versionchanged:: 2.1
|
||
|
The ``encode_keys`` parameter was removed.
|
||
|
|
||
|
.. versionchanged:: 0.5
|
||
|
Added the ``sort``, ``key``, and ``separator`` parameters.
|
||
|
"""
|
||
|
warnings.warn(
|
||
|
"'werkzeug.urls.url_encode' is deprecated and will be removed in Werkzeug 2.4."
|
||
|
" Use 'urllib.parse.urlencode' instead.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
separator = _to_str(separator, "ascii")
|
||
|
return separator.join(_url_encode_impl(obj, charset, sort, key))
|
||
|
|
||
|
|
||
|
def url_encode_stream(
|
||
|
obj: t.Mapping[str, str] | t.Iterable[tuple[str, str]],
|
||
|
stream: t.IO[str] | None = None,
|
||
|
charset: str = "utf-8",
|
||
|
sort: bool = False,
|
||
|
key: t.Callable[[tuple[str, str]], t.Any] | None = None,
|
||
|
separator: str = "&",
|
||
|
) -> None:
|
||
|
"""Like :meth:`url_encode` but writes the results to a stream
|
||
|
object. If the stream is `None` a generator over all encoded
|
||
|
pairs is returned.
|
||
|
|
||
|
:param obj: the object to encode into a query string.
|
||
|
:param stream: a stream to write the encoded object into or `None` if
|
||
|
an iterator over the encoded pairs should be returned. In
|
||
|
that case the separator argument is ignored.
|
||
|
:param charset: the charset of the query string.
|
||
|
:param sort: set to `True` if you want parameters to be sorted by `key`.
|
||
|
:param separator: the separator to be used for the pairs.
|
||
|
:param key: an optional function to be used for sorting. For more details
|
||
|
check out the :func:`sorted` documentation.
|
||
|
|
||
|
.. deprecated:: 2.3
|
||
|
Will be removed in Werkzeug 2.4. Use ``urllib.parse.urlencode`` instead.
|
||
|
|
||
|
.. versionchanged:: 2.1
|
||
|
The ``encode_keys`` parameter was removed.
|
||
|
|
||
|
.. versionadded:: 0.8
|
||
|
"""
|
||
|
warnings.warn(
|
||
|
"'werkzeug.urls.url_encode_stream' is deprecated and will be removed in"
|
||
|
" Werkzeug 2.4. Use 'urllib.parse.urlencode' instead.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
separator = _to_str(separator, "ascii")
|
||
|
gen = _url_encode_impl(obj, charset, sort, key)
|
||
|
if stream is None:
|
||
|
return gen # type: ignore
|
||
|
for idx, chunk in enumerate(gen):
|
||
|
if idx:
|
||
|
stream.write(separator)
|
||
|
stream.write(chunk)
|
||
|
return None
|
||
|
|
||
|
|
||
|
def url_join(
|
||
|
base: str | tuple[str, str, str, str, str],
|
||
|
url: str | tuple[str, str, str, str, str],
|
||
|
allow_fragments: bool = True,
|
||
|
) -> str:
|
||
|
"""Join a base URL and a possibly relative URL to form an absolute
|
||
|
interpretation of the latter.
|
||
|
|
||
|
:param base: the base URL for the join operation.
|
||
|
:param url: the URL to join.
|
||
|
:param allow_fragments: indicates whether fragments should be allowed.
|
||
|
|
||
|
.. deprecated:: 2.3
|
||
|
Will be removed in Werkzeug 2.4. Use ``urllib.parse.urljoin`` instead.
|
||
|
"""
|
||
|
warnings.warn(
|
||
|
"'werkzeug.urls.url_join' is deprecated and will be removed in Werkzeug 2.4."
|
||
|
" Use 'urllib.parse.urljoin' instead.",
|
||
|
DeprecationWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
|
||
|
if isinstance(base, tuple):
|
||
|
base = url_unparse(base)
|
||
|
if isinstance(url, tuple):
|
||
|
url = url_unparse(url)
|
||
|
|
||
|
_check_str_tuple((base, url))
|
||
|
s = _make_encode_wrapper(base)
|
||
|
|
||
|
if not base:
|
||
|
return url
|
||
|
if not url:
|
||
|
return base
|
||
|
|
||
|
bscheme, bnetloc, bpath, bquery, bfragment = url_parse(
|
||
|
base, allow_fragments=allow_fragments
|
||
|
)
|
||
|
scheme, netloc, path, query, fragment = url_parse(url, bscheme, allow_fragments)
|
||
|
if scheme != bscheme:
|
||
|
return url
|
||
|
if netloc:
|
||
|
return url_unparse((scheme, netloc, path, query, fragment))
|
||
|
netloc = bnetloc
|
||
|
|
||
|
if path[:1] == s("/"):
|
||
|
segments = path.split(s("/"))
|
||
|
elif not path:
|
||
|
segments = bpath.split(s("/"))
|
||
|
if not query:
|
||
|
query = bquery
|
||
|
else:
|
||
|
segments = bpath.split(s("/"))[:-1] + path.split(s("/"))
|
||
|
|
||
|
# If the rightmost part is "./" we want to keep the slash but
|
||
|
# remove the dot.
|
||
|
if segments[-1] == s("."):
|
||
|
segments[-1] = s("")
|
||
|
|
||
|
# Resolve ".." and "."
|
||
|
segments = [segment for segment in segments if segment != s(".")]
|
||
|
while True:
|
||
|
i = 1
|
||
|
n = len(segments) - 1
|
||
|
while i < n:
|
||
|
if segments[i] == s("..") and segments[i - 1] not in (s(""), s("..")):
|
||
|
del segments[i - 1 : i + 1]
|
||
|
break
|
||
|
i += 1
|
||
|
else:
|
||
|
break
|
||
|
|
||
|
# Remove trailing ".." if the URL is absolute
|
||
|
unwanted_marker = [s(""), s("..")]
|
||
|
while segments[:2] == unwanted_marker:
|
||
|
del segments[1]
|
||
|
|
||
|
path = s("/").join(segments)
|
||
|
return url_unparse((scheme, netloc, path, query, fragment))
|
||
|
|
||
|
|
||
|
def _urlencode(
|
||
|
query: t.Mapping[str, str] | t.Iterable[tuple[str, str]], encoding: str = "utf-8"
|
||
|
) -> str:
|
||
|
items = [x for x in iter_multi_items(query) if x[1] is not None]
|
||
|
# safe = https://url.spec.whatwg.org/#percent-encoded-bytes
|
||
|
return urlencode(items, safe="!$'()*,/:;?@", encoding=encoding)
|